DFSGeneralTest.java example

Explorer
hadoop-20-master
- src
package org.apache.hadoop.hdfs;
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Date;
import java.util.Random;

import org.apache.hadoop.mapred.GenMapper;
import org.apache.hadoop.mapred.GenReduce;
import org.apache.hadoop.mapred.GenThread;
import org.apache.hadoop.mapred.GenReaderThread;
import org.apache.hadoop.mapred.GenWriterThread;
import org.apache.hadoop.mapred.DatanodeBenThread;
import org.apache.hadoop.mapred.DatanodeBenThread.RUNNING_TYPE;
import org.apache.hadoop.mapred.DatanodeBenThread.DatanodeBenRunTimeConstants;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.text.SimpleDateFormat;
import java.text.DateFormat;

@SuppressWarnings("deprecation")
public class DFSGeneralTest extends Configured implements Tool, GeneralConstant{

  private static Configuration fsConfig;
  private long nmaps;
  private long nthreads;
  private int buffersize = GenThread.DEFAULT_BUFFER_SIZE;
  private long datarate = GenThread.DEFAULT_DATA_RATE;
  static final String[] testtypes = {GenWriterThread.TEST_TYPE, 
                                     DatanodeBenThread.TEST_TYPE};
  private static String testtype = null;
  private final static String DEFAULT_USAGE = 
      "USAGE: bin/hadoop hadoop-*-benchmark.jar " + 
      "gentest %s [-nMaps] [-nThreads] [-buffersize] [-workdir] " +
      "[-writerate] [-cleanup] %s\n";
  private String dfs_output = null;
  private String dfs_input = null;
  private String input = null;
  private String output = null;
  private String workdir = null;
  private boolean cleanup = false;
  private Random rb = new Random();
  private static final DateFormat dateFormat =
      new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss-SSS");

  private String uniqueId = (dateFormat.format(new Date())) + "." 
      + rb.nextInt();

  public static void printUsage() {
    System.err.printf(DEFAULT_USAGE, "testtype", "<args...>"); 
    System.err.print("    testtype could be ");
    for (String type: testtypes) {
      System.err.print("\"" + type + "\" ");
    }
    System.err.println();
    System.err.println("    -nMaps [number of machines] Default value = " + NMAPS);
    System.err.println("    -nThreads [number of threads in one machine] Default "
        + "value = " + NTHREADS);
    System.err.println("    -buffersize [X KB buffer] default value = " +
          GenThread.DEFAULT_BUFFER_SIZE);
    System.err.println("    -workdir [working directory] default value = " + 
        INPUT + "[testtype]");
    System.err.println("    -writerate [X KB data allowed to write per " +
        "second] default value = " + GenThread.DEFAULT_DATA_RATE);
    System.err.println("    -cleanup :delete all temp data when test is done.");
    System.err.println();
    for (String type : testtypes) {
      System.err.println("Test " + type + ":");
      printUsage(type, false);
    }
    System.exit(1);
  }
  
  public static void printUsage(String testtype, boolean exitAfterPrint) {
    if (testtype.equals(GenWriterThread.TEST_TYPE)) {
      System.err.printf(DEFAULT_USAGE, testtype, "[-sync] [-roll] " 
                        + "[-maxtime] ");
      System.err.println("    -sync [(sec) sync file once/Xsec] <=0 " +
          "means no sync default value = " + 
          GenWriterThread.DEFAULT_SYNC_INTERVAL_SEC);
      System.err.println("    -roll [(sec) roll file once/Xsec] <=0 " +
          "means no roll, default value = " +
          GenWriterThread.DEFAULT_ROLL_INTERVAL_SEC);
      System.err.println("    -maxtime [(sec) max running time] default " +
          "value = " + GenWriterThread.DEFAULT_MAX_TIME_SEC);
      System.err.println();
    } else if (testtype.equals(DatanodeBenThread.TEST_TYPE)) {
      System.err.printf(DEFAULT_USAGE, testtype, "{[-prepare]} {[-maxtime] " +
          "[-filesize] [-dn] [-pread] [-minfile] [-rep]}");
      System.err.println("    -prepare [generate at least X files per " +
          "datanode in each namespace] default value = " + 
          DatanodeBenThread.DEFAULT_MIN_NUMBER_OF_FILES_PER_DATANODE + 
          " Need to run prepare first before running benchmark");
      System.err.println("    -maxtime [(sec) max running time] default " +
          "value = " + DatanodeBenThread.DEFAULT_MAX_TIME_SEC);
      System.err.println("    -filesize [X MB per file] default value = " +
          DatanodeBenThread.DEFAULT_FILE_SIZE);
      System.err.println("    -dn [Stress test X datanodes] " + 
          "default value = " + DatanodeBenThread.DEFAULT_DATANODE_NUMBER);
      System.err.println("    -pread [read percent: X read and (1-X) write, " +
          "0<=X<=1] default value = " + DatanodeBenThread.DEFAULT_READ_PERCENT);
      System.err.println("    -minfile [choose datanodes with at least X files]" +
          " default value = " + 
          DatanodeBenThread.DEFAULT_MIN_NUMBER_OF_FILES_PER_DATANODE);
      System.err.println("    -rep [X replicas per file] default value = " + 
          DatanodeBenThread.DEFAULT_REPLICATION_NUM);
      System.err.println();
    }
    if (exitAfterPrint) {
      System.exit(1);
    }
  }

  public void control(JobConf fsConfig, String fileName)
      throws IOException {
    String name = fileName;
    FileSystem fs = FileSystem.get(fsConfig);

    SequenceFile.Writer write = null;
    for (int i = 0; i < nmaps; i++) {
      try {
        Path controlFile = new Path(dfs_input, name + i);
        write = SequenceFile.createWriter(fs, fsConfig, controlFile,
            Text.class, Text.class, CompressionType.NONE);
        write.append(new Text(name + i), new Text(workdir));
      } finally {
        if (write != null)
          write.close();
        write = null;
      }
    }
  }
  
  /**
   * Initialize general config 
   */
  private String[] initializeGeneralConf(String[] args, JobConf conf)
      throws IOException {
    nmaps = NMAPS;
    nthreads = NTHREADS;
    buffersize = GenThread.DEFAULT_BUFFER_SIZE;
    datarate = GenThread.DEFAULT_DATA_RATE;
    ArrayList<String> newArgs = new ArrayList<String>();
    for (int i = 1; i < args.length; i++) {
      if (args[i].equals("-nThreads")) nthreads = Long.parseLong(args[++i]);
      else if (args[i].equals("-nMaps")) nmaps = Long.parseLong(args[++i]);
      else if (args[i].equals("-buffersize")) buffersize = Integer.parseInt(args[++i]);
      else if (args[i].equals("-workdir")) workdir = args[++i];
      else if (args[i].equals("-writerate")) datarate = Long.parseLong(args[++i]);
      else if (args[i].equals("-cleanup")) cleanup = true;
      else {
        newArgs.add(args[i]);
      }
    }
    return newArgs.toArray(new String[newArgs.size()]);
  }
  
  /**
   * Generate control files for write test and initialize configure file
   * and return the job config 
   * @param args
   * @return
   */
  private void initializeGenWriterJob(String[] args, JobConf conf) throws IOException{
    long sync = GenWriterThread.DEFAULT_SYNC_INTERVAL_SEC;
    long roll = GenWriterThread.DEFAULT_ROLL_INTERVAL_SEC;
    long maxtime = GenWriterThread.DEFAULT_MAX_TIME_SEC;
    
    for (int i = 0; i < args.length; i++) {
      if (args[i].equals("-sync")) sync = Long.parseLong(args[++i]);
      else if (args[i].equals("-roll")) roll = Long.parseLong(args[++i]);
      else if (args[i].equals("-maxtime")) maxtime = Long.parseLong(args[++i]);
      else {
        printUsage(testtype, true);
      }
    }
    // run the control() to set up for the FileSystem
    control(conf, "testing-" + testtype);
    conf.setLong(GenWriterThread.WRITER_ROLL_INTERVAL_KEY, roll);
    conf.setLong(GenWriterThread.WRITER_SYNC_INTERVAL_KEY, sync);
    conf.setLong(GenWriterThread.MAX_TIME_SEC_KEY, maxtime);
    conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.GenWriterThread");
    conf.setMapperClass(GenMapper.class);
    conf.setReducerClass(GenReduce.class);
  }
  
  private void initializeDatanodeBenJob(String[] args, JobConf conf) 
      throws IOException {
    if (args[0].equals("-prepare")) {
      if (args.length < 2) {
        printUsage(testtype, true);
      }
      long minFile = DatanodeBenThread.DEFAULT_MIN_NUMBER_OF_FILES_PER_DATANODE;
      minFile = Long.parseLong(args[1]);
      conf.setLong(DatanodeBenThread.MIN_FILE_PER_DATANODE_KEY, minFile);
      conf.setInt(DatanodeBenThread.RUNNING_TYPE_KEY, RUNNING_TYPE.PREPARE.ordinal());
      conf.setLong(DatanodeBenThread.MAX_TIME_SEC_KEY, 3600L);
      conf.setLong(DatanodeBenThread.FILE_SIZE_KEY, 256L);
      conf.setLong(DatanodeBenThread.REPLICATION_KEY, 1L);
      control(conf, "testing-prepare-" + testtype);
    } else {
      long maxtime = DatanodeBenThread.DEFAULT_MAX_TIME_SEC;
      long filesize = DatanodeBenThread.DEFAULT_FILE_SIZE;
      long nDatanode = DatanodeBenThread.DEFAULT_DATANODE_NUMBER;
      float pread = DatanodeBenThread.DEFAULT_READ_PERCENT;
      long minFile = DatanodeBenThread.DEFAULT_MIN_NUMBER_OF_FILES_PER_DATANODE;
      short rep = DatanodeBenThread.DEFAULT_REPLICATION_NUM;
      
      for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-maxtime")) maxtime = Long.parseLong(args[++i]);
        else if (args[i].equals("-filesize")) filesize = 
            Long.parseLong(args[++i]);
        else if (args[i].equals("-dn")) nDatanode = 
            Long.parseLong(args[++i]);
        else if (args[i].equals("-pread")) pread = Float.parseFloat(args[++i]);
        else if (args[i].equals("-minfile")) minFile = Long.parseLong(args[++i]);
        else if (args[i].equals("-rep")) rep = Short.parseShort(args[++i]);
        else {
          printUsage(testtype, true);
        }
      }
      if (pread + 1e-9 < 0.0 || pread - 1e-9 > 1.0) {
        printUsage(testtype, true);
      }
      conf.setLong(DatanodeBenThread.MAX_TIME_SEC_KEY, maxtime);
      conf.setLong(DatanodeBenThread.FILE_SIZE_KEY, filesize);
      conf.setFloat(DatanodeBenThread.READ_PERCENT_KEY, pread);
      conf.setLong(DatanodeBenThread.REPLICATION_KEY, rep);
      List<JobConf> nameNodeConfs = DatanodeBenThread.getNameNodeConfs(conf);
      DatanodeBenThread dbt = new DatanodeBenThread(conf);
      List<DatanodeInfo> victims = dbt.getTestDatanodes(nameNodeConfs,
          workdir, nDatanode, minFile);
      System.out.print("We choose " + victims.size() + " victim datanodes: ");
      String victimStr = "";
      int i = 0;
      for (DatanodeInfo victim: victims) {
        victimStr += victim.getHostName() + ":" + victim.getPort();
        i++;
        if (i < victims.size()) {
          victimStr += ",";
        }
      }
      System.out.println(victimStr);
      conf.set(DatanodeBenThread.VICTIM_DATANODE_KEY, victimStr);
      control(conf, "testing-" + testtype);
    }
    conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.DatanodeBenThread");
    conf.setMapperClass(GenMapper.class);
    conf.setReducerClass(GenReduce.class);
  }
  
  /*
   * Spawn a map-reduce jobs based on the control files 
   * generated by the writers. 
   */
  private void verifyFiles(FileSystem fs) 
      throws IOException {
    Path inputPath = new Path(input, "filelists");
    Path outputPath = new Path(dfs_output, "verify_results");
    if (!fs.exists(inputPath)) {
      System.out.println("Couldn't find " + inputPath + " Skip verification.");
      return;
    }
    System.out.println("-------------------");
    System.out.println("VERIFY FILES");
    System.out.println("-------------------");
    JobConf conf = new JobConf(fsConfig, DFSGeneralTest.class);
    conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.GenReaderThread");
    testtype = GenReaderThread.TEST_TYPE;
    conf.set(TEST_TYPE_KEY, testtype);
    conf.setMapperClass(GenMapper.class);
    conf.setReducerClass(GenReduce.class);
    conf.setJobName(getUniqueName("gentest-verify-" + testtype));
    output = getUniqueName(OUTPUT + testtype);
    updateJobConf(conf, inputPath, outputPath);
    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    long endTime = System.currentTimeMillis();
    printResult(fs, new Path(output, "results"), startTime, endTime);
  }
  
  private void printResult(FileSystem fs, Path p, 
      long startTime, long endTime) throws IOException{
    // printout the result
    System.out.println("-------------------");
    System.out.println("RESULT");
    System.out.println("-------------------");
    FSDataInputStream out = null;
    try {
      out = fs.open(p);
      while (true) {
        String temp = out.readLine();
        if (temp == null)
          break;
        System.out.println(temp);
      }
    } finally {
      if (out != null)
        out.close();
    }


    System.out.println("------------------");
    double execTime = (endTime - startTime) / 1000.0;
    String unit = "seconds";
    if (execTime > 60) {
      execTime /= 60.0;
      unit = "mins";
    }
    if (execTime > 60) {
      execTime /= 60.0;
      unit = "hours";
    }
    System.out.println("Time executed :\t" + execTime + " " + unit);
  }
  
  private void updateJobConf(JobConf conf, Path inputPath, Path outputPath) {
    // set specific job config
    conf.setLong(NUMBER_OF_MAPS_KEY, nmaps);
    conf.setLong(NUMBER_OF_THREADS_KEY, nthreads);
    conf.setInt(BUFFER_SIZE_KEY, buffersize);
    conf.setLong(WRITER_DATARATE_KEY, datarate);
    conf.setLong("mapred.task.timeout", Long.MAX_VALUE);
    conf.set(OUTPUT_DIR_KEY, output);
    
    // set the output and input for the map reduce
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setNumReduceTasks(1);
    conf.setSpeculativeExecution(false);
  }
  
  // Clean up all directories in all namespaces
  private void cleanUpDirs(Configuration conf) throws IOException {
    List<InetSocketAddress> nameNodeAddrs = 
        DFSUtil.getClientRpcAddresses(conf, null);
    for (InetSocketAddress nnAddr : nameNodeAddrs) {
      Configuration newConf = new Configuration(conf);
      newConf.set(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY,
          nnAddr.getHostName() + ":" + nnAddr.getPort());
      NameNode.setupDefaultURI(newConf);
      FileSystem fs = FileSystem.get(newConf);
      if (fs.exists(new Path(dfs_output)))
        fs.delete(new Path(dfs_output), true);
      if (fs.exists(new Path(dfs_input)))
        fs.delete(new Path(dfs_input), true);
      if (fs.exists(new Path(input)))
        fs.delete(new Path(input), true);
      if (fs.exists(new Path(output)))
        fs.delete(new Path(output), true);
    }
  }
  
  private String getUniqueName(String prefix) {
    return prefix + "-" + uniqueId;
  }

  @Override
  public int run(String[] args) throws IOException {

    if (args.length < 1) {
      printUsage();
    }
    testtype = args[0];
    if (!Arrays.asList(testtypes).contains(testtype)) {
      System.err.println(testtype + " is not a supported test type");
      printUsage();
    }
    // running the Writting
    fsConfig = new Configuration(getConf());

    dfs_output = getUniqueName(DFS_OUTPUT + testtype); 
    dfs_input = getUniqueName(DFS_INPUT + testtype);
    input = getUniqueName(INPUT + testtype);
    output = getUniqueName(OUTPUT + testtype);
    workdir = input;
    cleanUpDirs(fsConfig);
    
    FileSystem fs = FileSystem.get(fsConfig);
    JobConf conf = new JobConf(fsConfig, DFSGeneralTest.class);
    conf.setJobName(getUniqueName("gentest-" + testtype));
    conf.set(TEST_TYPE_KEY, testtype);
    
    String[] newArgs = initializeGeneralConf(args, conf);
    if (testtype.equals(GenWriterThread.TEST_TYPE)) {
      initializeGenWriterJob(newArgs, conf);
    } else if (testtype.equals(DatanodeBenThread.TEST_TYPE)) {
      initializeDatanodeBenJob(newArgs, conf);
    } else {
      printUsage();
    }
    
    updateJobConf(conf, new Path(dfs_input), new Path(dfs_output, "results"));
    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    long endTime = System.currentTimeMillis();
    printResult(fs, new Path(output, "results"), startTime, endTime);
    verifyFiles(fs);
    //Delete all related files
    if (cleanup)
      cleanUpDirs(fsConfig);
    return 0;
  }

  public static void main(String[] args) throws Exception {
    System.exit(ToolRunner.run(new DFSGeneralTest(), args));
  }

}