TestCorruptedDiskJob.java example

Explorer

bigpetstore-master
- hadoop-1.2.1
  - src
- src
  - integration
    - java
      - org
        bigtop
        bigpetstore
        integration
        BigPetStoreHiveIT.java
        BigPetStoreMahoutIT.java
        BigPetStorePigIT.java
        ITUtils.java
  - main
    - java
      - org
        bigtop
        bigpetstore
        clustering
        BPSRecommnder.java
        MahoutClusterTransactionsByRegion.java
        contract
        PetStoreStatistics.java
        etl
        CrunchETL.java
        HiveViewCreator.java
        LineItem.java
        PigCSVCleaner.java
        generator
        BPSGenerator.java
        GeneratePetStoreTransactionsInputFormat.java
        PetStoreTransaction.java
        PetStoreTransactionInputSplit.java
        TransactionIteratorFactory.java
        util
        BigPetStoreConstants.java
        DeveloperTools.java
        NumericalIdUtils.java
        Pair.java
        PetStoreParseFunctions.java
        StringUtils.java
  - test
    - java
      - org
        bigtop
        bigpetstore
        docs
        TestDocs.java
        generator
        TestNumericalIdUtils.java
        TestPetStoreTransactionGeneratorJob.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import org.apache.commons.logging.Log;

import java.util.List;

import org.apache.hadoop.util.RemoteExecution;
import org.apache.hadoop.util.SSHRemoteExecution;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.test.system.JTProtocol;
import org.apache.hadoop.mapreduce.test.system.JobInfo;
import org.apache.hadoop.mapreduce.test.system.MRCluster;
import org.apache.hadoop.mapreduce.test.system.TTClient;
import org.apache.hadoop.mapreduce.test.system.JTClient;
import org.apache.hadoop.examples.RandomWriter;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.Tool;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.util.Hashtable;

/**
 * Submit a job. Corrupt some disks, when job is running 
 * job should continue running and pass successfully.
 */
public class TestCorruptedDiskJob {
  private static final Log LOG = LogFactory
      .getLog(TestCorruptedDiskJob.class);
  private static MRCluster cluster;
  private static Path inputDir = new Path("input");
  private static Path outputDir = new Path("output");
  private static Configuration conf = new Configuration();
  private static String confFile = "mapred-site.xml";
  private static FileSystem dfs = null;
  private static final int RW_BYTES_PER_MAP = 25 * 1024 * 1024;
  private static final int RW_MAPS_PER_HOST = 2;
  private static JobClient client = null;
  int count = 0;
  String userName = null;
  JobStatus[] jobStatus = null;
  private static List<TTClient> ttClients = null;

  @BeforeClass
  public static void before() throws Exception {
    cluster = MRCluster.createCluster(conf);
    String [] expExcludeList = {"java.net.ConnectException",
        "java.io.IOException"};
    cluster.setExcludeExpList(expExcludeList);
    cluster.setUp();
    conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
    String newConfDir = cluster.
        getConf().get("test.system.hdrc.hadoopnewconfdir");
    LOG.info("newConfDir is :" + newConfDir);
    String newMapredLocalDirPath = conf.get("mapred.local.dir");

    //One of the disk is made corrupted by making the path inaccessible.
    newMapredLocalDirPath  = newMapredLocalDirPath.replaceAll("1", "11");
    LOG.info("newMapredLocalDirPath is :" + newMapredLocalDirPath);

    Hashtable<String,String> prop = new Hashtable<String,String>();
    prop.put("mapred.local.dir", newMapredLocalDirPath);

    String userName = System.getProperty("user.name");
    LOG.info("user name is :" + userName);

    //Creating the string to modify taskcontroller.cfg
    String replaceTaskControllerCommand = "cat " + newConfDir +
        "/taskcontroller.cfg | grep -v mapred.local.dir  > " + 
        newConfDir + "/tmp1.cfg;echo mapred.local.dir=" + 
        newMapredLocalDirPath + " >> " + newConfDir + 
        "/tmp2.cfg;cat " + newConfDir + 
        "/tmp2.cfg  > " + newConfDir + 
        "/taskcontroller.cfg;cat " + newConfDir + 
        "/tmp1.cfg  >> " + newConfDir + "/taskcontroller.cfg;";
      
    ttClients = cluster.getTTClients();
    cluster.restartClusterWithNewConfig(prop, confFile);
    UtilsForTests.waitFor(1000);

    //Changing the taskcontroller.cfg file in all taktracker nodes.
    //This is required as mapred.local.dir should match 
    //in both mapred-site.xml and taskcontroller.cfg.
    //This change can be done after cluster is brought up as
    //Linux task controller will access taskcontroller.cfg
    //when a job's task starts.
    for ( int i = 0;i < ttClients.size();i++ ) {
      TTClient ttClient = (TTClient)ttClients.get(i);
      String ttClientHostName = ttClient.getHostName();
      try {
        RemoteExecution rExec = new SSHRemoteExecution();
        rExec.executeCommand(ttClientHostName, userName,
          replaceTaskControllerCommand);
      } catch (Exception e) { e.printStackTrace(); };
    }

    conf = cluster.getJTClient().getProxy().getDaemonConf();
    client = cluster.getJTClient().getClient();
    dfs = client.getFs();
    dfs.delete(inputDir, true);
    dfs.delete(outputDir, true);
  }

  @AfterClass
  public static void after() throws Exception {
    cluster.tearDown();
    cluster.restart();
    UtilsForTests.waitFor(1000);
    dfs.delete(inputDir, true);
    dfs.delete(outputDir, true);
  }

  /**
   * This tests the corrupted disk. If a disk does not exist, still
   * the job should run successfully.
   */
  @Test
  public void testCorruptedDiskJob() throws 
      Exception {

    // Scale down the default settings for RandomWriter for the test-case
    // Generates NUM_HADOOP_SLAVES * RW_MAPS_PER_HOST * RW_BYTES_PER_MAP
    conf.setInt("test.randomwrite.bytes_per_map", RW_BYTES_PER_MAP);
    conf.setInt("test.randomwriter.maps_per_host", RW_MAPS_PER_HOST);
    String[] rwArgs = {inputDir.toString()};

    // JTProtocol remoteJTClient
    JTProtocol remoteJTClient = cluster.getJTClient().getProxy();

    // JobInfo jInfo;
    JobInfo jInfo = null;

    dfs.delete(inputDir, true);

    // Run RandomWriter
    Assert.assertEquals(ToolRunner.run(conf, new RandomWriter(), rwArgs),
        0);

    jobStatus = client.getAllJobs();
    JobID id = null;
    //Getting the jobId of the just submitted job
    id = jobStatus[0].getJobID();

    LOG.info("jobid is :" + id.toString());

    Assert.assertTrue("Failed to complete the job",
    cluster.getJTClient().isJobStopped(id));

    jInfo = remoteJTClient.getJobInfo(id);
    JobStatus jStatus = jInfo.getStatus();

    if (jStatus != null) {
      Assert.assertEquals("Job has not succeeded...",
        JobStatus.SUCCEEDED, jStatus.getRunState());
    }
  }
}