/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import org.apache.commons.logging.Log;
import java.util.List;
import org.apache.hadoop.util.RemoteExecution;
import org.apache.hadoop.util.SSHRemoteExecution;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.test.system.JTProtocol;
import org.apache.hadoop.mapreduce.test.system.JobInfo;
import org.apache.hadoop.mapreduce.test.system.MRCluster;
import org.apache.hadoop.mapreduce.test.system.TTClient;
import org.apache.hadoop.mapreduce.test.system.JTClient;
import org.apache.hadoop.examples.RandomWriter;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.Tool;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.util.Hashtable;
/**
* Submit a job. Corrupt some disks, when job is running
* job should continue running and pass successfully.
*/
public class TestCorruptedDiskJob {
private static final Log LOG = LogFactory
.getLog(TestCorruptedDiskJob.class);
private static MRCluster cluster;
private static Path inputDir = new Path("input");
private static Path outputDir = new Path("output");
private static Configuration conf = new Configuration();
private static String confFile = "mapred-site.xml";
private static FileSystem dfs = null;
private static final int RW_BYTES_PER_MAP = 25 * 1024 * 1024;
private static final int RW_MAPS_PER_HOST = 2;
private static JobClient client = null;
int count = 0;
String userName = null;
JobStatus[] jobStatus = null;
private static List<TTClient> ttClients = null;
@BeforeClass
public static void before() throws Exception {
cluster = MRCluster.createCluster(conf);
String [] expExcludeList = {"java.net.ConnectException",
"java.io.IOException"};
cluster.setExcludeExpList(expExcludeList);
cluster.setUp();
conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
String newConfDir = cluster.
getConf().get("test.system.hdrc.hadoopnewconfdir");
LOG.info("newConfDir is :" + newConfDir);
String newMapredLocalDirPath = conf.get("mapred.local.dir");
//One of the disk is made corrupted by making the path inaccessible.
newMapredLocalDirPath = newMapredLocalDirPath.replaceAll("1", "11");
LOG.info("newMapredLocalDirPath is :" + newMapredLocalDirPath);
Hashtable<String,String> prop = new Hashtable<String,String>();
prop.put("mapred.local.dir", newMapredLocalDirPath);
String userName = System.getProperty("user.name");
LOG.info("user name is :" + userName);
//Creating the string to modify taskcontroller.cfg
String replaceTaskControllerCommand = "cat " + newConfDir +
"/taskcontroller.cfg | grep -v mapred.local.dir > " +
newConfDir + "/tmp1.cfg;echo mapred.local.dir=" +
newMapredLocalDirPath + " >> " + newConfDir +
"/tmp2.cfg;cat " + newConfDir +
"/tmp2.cfg > " + newConfDir +
"/taskcontroller.cfg;cat " + newConfDir +
"/tmp1.cfg >> " + newConfDir + "/taskcontroller.cfg;";
ttClients = cluster.getTTClients();
cluster.restartClusterWithNewConfig(prop, confFile);
UtilsForTests.waitFor(1000);
//Changing the taskcontroller.cfg file in all taktracker nodes.
//This is required as mapred.local.dir should match
//in both mapred-site.xml and taskcontroller.cfg.
//This change can be done after cluster is brought up as
//Linux task controller will access taskcontroller.cfg
//when a job's task starts.
for ( int i = 0;i < ttClients.size();i++ ) {
TTClient ttClient = (TTClient)ttClients.get(i);
String ttClientHostName = ttClient.getHostName();
try {
RemoteExecution rExec = new SSHRemoteExecution();
rExec.executeCommand(ttClientHostName, userName,
replaceTaskControllerCommand);
} catch (Exception e) { e.printStackTrace(); };
}
conf = cluster.getJTClient().getProxy().getDaemonConf();
client = cluster.getJTClient().getClient();
dfs = client.getFs();
dfs.delete(inputDir, true);
dfs.delete(outputDir, true);
}
@AfterClass
public static void after() throws Exception {
cluster.tearDown();
cluster.restart();
UtilsForTests.waitFor(1000);
dfs.delete(inputDir, true);
dfs.delete(outputDir, true);
}
/**
* This tests the corrupted disk. If a disk does not exist, still
* the job should run successfully.
*/
@Test
public void testCorruptedDiskJob() throws
Exception {
// Scale down the default settings for RandomWriter for the test-case
// Generates NUM_HADOOP_SLAVES * RW_MAPS_PER_HOST * RW_BYTES_PER_MAP
conf.setInt("test.randomwrite.bytes_per_map", RW_BYTES_PER_MAP);
conf.setInt("test.randomwriter.maps_per_host", RW_MAPS_PER_HOST);
String[] rwArgs = {inputDir.toString()};
// JTProtocol remoteJTClient
JTProtocol remoteJTClient = cluster.getJTClient().getProxy();
// JobInfo jInfo;
JobInfo jInfo = null;
dfs.delete(inputDir, true);
// Run RandomWriter
Assert.assertEquals(ToolRunner.run(conf, new RandomWriter(), rwArgs),
0);
jobStatus = client.getAllJobs();
JobID id = null;
//Getting the jobId of the just submitted job
id = jobStatus[0].getJobID();
LOG.info("jobid is :" + id.toString());
Assert.assertTrue("Failed to complete the job",
cluster.getJTClient().isJobStopped(id));
jInfo = remoteJTClient.getJobInfo(id);
JobStatus jStatus = jInfo.getStatus();
if (jStatus != null) {
Assert.assertEquals("Job has not succeeded...",
JobStatus.SUCCEEDED, jStatus.getRunState());
}
}
}