package org.apache.hadoop.hdfs; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.net.InetSocketAddress; import java.util.Arrays; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Date; import java.util.Random; import org.apache.hadoop.mapred.GenMapper; import org.apache.hadoop.mapred.GenReduce; import org.apache.hadoop.mapred.GenThread; import org.apache.hadoop.mapred.GenReaderThread; import org.apache.hadoop.mapred.GenWriterThread; import org.apache.hadoop.mapred.DatanodeBenThread; import org.apache.hadoop.mapred.DatanodeBenThread.RUNNING_TYPE; import org.apache.hadoop.mapred.DatanodeBenThread.DatanodeBenRunTimeConstants; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import java.text.SimpleDateFormat; import java.text.DateFormat; @SuppressWarnings("deprecation") public class DFSGeneralTest extends Configured implements Tool, GeneralConstant{ private static Configuration fsConfig; private long nmaps; private long nthreads; private int buffersize = GenThread.DEFAULT_BUFFER_SIZE; private long datarate = GenThread.DEFAULT_DATA_RATE; static final String[] testtypes = {GenWriterThread.TEST_TYPE, DatanodeBenThread.TEST_TYPE}; private static String testtype = null; private final static String DEFAULT_USAGE = "USAGE: bin/hadoop hadoop-*-benchmark.jar " + "gentest %s [-nMaps] [-nThreads] [-buffersize] [-workdir] " + "[-writerate] [-cleanup] %s\n"; private String dfs_output = null; private String dfs_input = null; private String input = null; private String output = null; private String workdir = null; private boolean cleanup = false; private Random rb = new Random(); private static final DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss-SSS"); private String uniqueId = (dateFormat.format(new Date())) + "." + rb.nextInt(); public static void printUsage() { System.err.printf(DEFAULT_USAGE, "testtype", "<args...>"); System.err.print(" testtype could be "); for (String type: testtypes) { System.err.print("\"" + type + "\" "); } System.err.println(); System.err.println(" -nMaps [number of machines] Default value = " + NMAPS); System.err.println(" -nThreads [number of threads in one machine] Default " + "value = " + NTHREADS); System.err.println(" -buffersize [X KB buffer] default value = " + GenThread.DEFAULT_BUFFER_SIZE); System.err.println(" -workdir [working directory] default value = " + INPUT + "[testtype]"); System.err.println(" -writerate [X KB data allowed to write per " + "second] default value = " + GenThread.DEFAULT_DATA_RATE); System.err.println(" -cleanup :delete all temp data when test is done."); System.err.println(); for (String type : testtypes) { System.err.println("Test " + type + ":"); printUsage(type, false); } System.exit(1); } public static void printUsage(String testtype, boolean exitAfterPrint) { if (testtype.equals(GenWriterThread.TEST_TYPE)) { System.err.printf(DEFAULT_USAGE, testtype, "[-sync] [-roll] " + "[-maxtime] "); System.err.println(" -sync [(sec) sync file once/Xsec] <=0 " + "means no sync default value = " + GenWriterThread.DEFAULT_SYNC_INTERVAL_SEC); System.err.println(" -roll [(sec) roll file once/Xsec] <=0 " + "means no roll, default value = " + GenWriterThread.DEFAULT_ROLL_INTERVAL_SEC); System.err.println(" -maxtime [(sec) max running time] default " + "value = " + GenWriterThread.DEFAULT_MAX_TIME_SEC); System.err.println(); } else if (testtype.equals(DatanodeBenThread.TEST_TYPE)) { System.err.printf(DEFAULT_USAGE, testtype, "{[-prepare]} {[-maxtime] " + "[-filesize] [-dn] [-pread] [-minfile] [-rep]}"); System.err.println(" -prepare [generate at least X files per " + "datanode in each namespace] default value = " + DatanodeBenThread.DEFAULT_MIN_NUMBER_OF_FILES_PER_DATANODE + " Need to run prepare first before running benchmark"); System.err.println(" -maxtime [(sec) max running time] default " + "value = " + DatanodeBenThread.DEFAULT_MAX_TIME_SEC); System.err.println(" -filesize [X MB per file] default value = " + DatanodeBenThread.DEFAULT_FILE_SIZE); System.err.println(" -dn [Stress test X datanodes] " + "default value = " + DatanodeBenThread.DEFAULT_DATANODE_NUMBER); System.err.println(" -pread [read percent: X read and (1-X) write, " + "0<=X<=1] default value = " + DatanodeBenThread.DEFAULT_READ_PERCENT); System.err.println(" -minfile [choose datanodes with at least X files]" + " default value = " + DatanodeBenThread.DEFAULT_MIN_NUMBER_OF_FILES_PER_DATANODE); System.err.println(" -rep [X replicas per file] default value = " + DatanodeBenThread.DEFAULT_REPLICATION_NUM); System.err.println(); } if (exitAfterPrint) { System.exit(1); } } public void control(JobConf fsConfig, String fileName) throws IOException { String name = fileName; FileSystem fs = FileSystem.get(fsConfig); SequenceFile.Writer write = null; for (int i = 0; i < nmaps; i++) { try { Path controlFile = new Path(dfs_input, name + i); write = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, Text.class, CompressionType.NONE); write.append(new Text(name + i), new Text(workdir)); } finally { if (write != null) write.close(); write = null; } } } /** * Initialize general config */ private String[] initializeGeneralConf(String[] args, JobConf conf) throws IOException { nmaps = NMAPS; nthreads = NTHREADS; buffersize = GenThread.DEFAULT_BUFFER_SIZE; datarate = GenThread.DEFAULT_DATA_RATE; ArrayList<String> newArgs = new ArrayList<String>(); for (int i = 1; i < args.length; i++) { if (args[i].equals("-nThreads")) nthreads = Long.parseLong(args[++i]); else if (args[i].equals("-nMaps")) nmaps = Long.parseLong(args[++i]); else if (args[i].equals("-buffersize")) buffersize = Integer.parseInt(args[++i]); else if (args[i].equals("-workdir")) workdir = args[++i]; else if (args[i].equals("-writerate")) datarate = Long.parseLong(args[++i]); else if (args[i].equals("-cleanup")) cleanup = true; else { newArgs.add(args[i]); } } return newArgs.toArray(new String[newArgs.size()]); } /** * Generate control files for write test and initialize configure file * and return the job config * @param args * @return */ private void initializeGenWriterJob(String[] args, JobConf conf) throws IOException{ long sync = GenWriterThread.DEFAULT_SYNC_INTERVAL_SEC; long roll = GenWriterThread.DEFAULT_ROLL_INTERVAL_SEC; long maxtime = GenWriterThread.DEFAULT_MAX_TIME_SEC; for (int i = 0; i < args.length; i++) { if (args[i].equals("-sync")) sync = Long.parseLong(args[++i]); else if (args[i].equals("-roll")) roll = Long.parseLong(args[++i]); else if (args[i].equals("-maxtime")) maxtime = Long.parseLong(args[++i]); else { printUsage(testtype, true); } } // run the control() to set up for the FileSystem control(conf, "testing-" + testtype); conf.setLong(GenWriterThread.WRITER_ROLL_INTERVAL_KEY, roll); conf.setLong(GenWriterThread.WRITER_SYNC_INTERVAL_KEY, sync); conf.setLong(GenWriterThread.MAX_TIME_SEC_KEY, maxtime); conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.GenWriterThread"); conf.setMapperClass(GenMapper.class); conf.setReducerClass(GenReduce.class); } private void initializeDatanodeBenJob(String[] args, JobConf conf) throws IOException { if (args[0].equals("-prepare")) { if (args.length < 2) { printUsage(testtype, true); } long minFile = DatanodeBenThread.DEFAULT_MIN_NUMBER_OF_FILES_PER_DATANODE; minFile = Long.parseLong(args[1]); conf.setLong(DatanodeBenThread.MIN_FILE_PER_DATANODE_KEY, minFile); conf.setInt(DatanodeBenThread.RUNNING_TYPE_KEY, RUNNING_TYPE.PREPARE.ordinal()); conf.setLong(DatanodeBenThread.MAX_TIME_SEC_KEY, 3600L); conf.setLong(DatanodeBenThread.FILE_SIZE_KEY, 256L); conf.setLong(DatanodeBenThread.REPLICATION_KEY, 1L); control(conf, "testing-prepare-" + testtype); } else { long maxtime = DatanodeBenThread.DEFAULT_MAX_TIME_SEC; long filesize = DatanodeBenThread.DEFAULT_FILE_SIZE; long nDatanode = DatanodeBenThread.DEFAULT_DATANODE_NUMBER; float pread = DatanodeBenThread.DEFAULT_READ_PERCENT; long minFile = DatanodeBenThread.DEFAULT_MIN_NUMBER_OF_FILES_PER_DATANODE; short rep = DatanodeBenThread.DEFAULT_REPLICATION_NUM; for (int i = 0; i < args.length; i++) { if (args[i].equals("-maxtime")) maxtime = Long.parseLong(args[++i]); else if (args[i].equals("-filesize")) filesize = Long.parseLong(args[++i]); else if (args[i].equals("-dn")) nDatanode = Long.parseLong(args[++i]); else if (args[i].equals("-pread")) pread = Float.parseFloat(args[++i]); else if (args[i].equals("-minfile")) minFile = Long.parseLong(args[++i]); else if (args[i].equals("-rep")) rep = Short.parseShort(args[++i]); else { printUsage(testtype, true); } } if (pread + 1e-9 < 0.0 || pread - 1e-9 > 1.0) { printUsage(testtype, true); } conf.setLong(DatanodeBenThread.MAX_TIME_SEC_KEY, maxtime); conf.setLong(DatanodeBenThread.FILE_SIZE_KEY, filesize); conf.setFloat(DatanodeBenThread.READ_PERCENT_KEY, pread); conf.setLong(DatanodeBenThread.REPLICATION_KEY, rep); List<JobConf> nameNodeConfs = DatanodeBenThread.getNameNodeConfs(conf); DatanodeBenThread dbt = new DatanodeBenThread(conf); List<DatanodeInfo> victims = dbt.getTestDatanodes(nameNodeConfs, workdir, nDatanode, minFile); System.out.print("We choose " + victims.size() + " victim datanodes: "); String victimStr = ""; int i = 0; for (DatanodeInfo victim: victims) { victimStr += victim.getHostName() + ":" + victim.getPort(); i++; if (i < victims.size()) { victimStr += ","; } } System.out.println(victimStr); conf.set(DatanodeBenThread.VICTIM_DATANODE_KEY, victimStr); control(conf, "testing-" + testtype); } conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.DatanodeBenThread"); conf.setMapperClass(GenMapper.class); conf.setReducerClass(GenReduce.class); } /* * Spawn a map-reduce jobs based on the control files * generated by the writers. */ private void verifyFiles(FileSystem fs) throws IOException { Path inputPath = new Path(input, "filelists"); Path outputPath = new Path(dfs_output, "verify_results"); if (!fs.exists(inputPath)) { System.out.println("Couldn't find " + inputPath + " Skip verification."); return; } System.out.println("-------------------"); System.out.println("VERIFY FILES"); System.out.println("-------------------"); JobConf conf = new JobConf(fsConfig, DFSGeneralTest.class); conf.set(THREAD_CLASS_KEY, "org.apache.hadoop.mapred.GenReaderThread"); testtype = GenReaderThread.TEST_TYPE; conf.set(TEST_TYPE_KEY, testtype); conf.setMapperClass(GenMapper.class); conf.setReducerClass(GenReduce.class); conf.setJobName(getUniqueName("gentest-verify-" + testtype)); output = getUniqueName(OUTPUT + testtype); updateJobConf(conf, inputPath, outputPath); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); long endTime = System.currentTimeMillis(); printResult(fs, new Path(output, "results"), startTime, endTime); } private void printResult(FileSystem fs, Path p, long startTime, long endTime) throws IOException{ // printout the result System.out.println("-------------------"); System.out.println("RESULT"); System.out.println("-------------------"); FSDataInputStream out = null; try { out = fs.open(p); while (true) { String temp = out.readLine(); if (temp == null) break; System.out.println(temp); } } finally { if (out != null) out.close(); } System.out.println("------------------"); double execTime = (endTime - startTime) / 1000.0; String unit = "seconds"; if (execTime > 60) { execTime /= 60.0; unit = "mins"; } if (execTime > 60) { execTime /= 60.0; unit = "hours"; } System.out.println("Time executed :\t" + execTime + " " + unit); } private void updateJobConf(JobConf conf, Path inputPath, Path outputPath) { // set specific job config conf.setLong(NUMBER_OF_MAPS_KEY, nmaps); conf.setLong(NUMBER_OF_THREADS_KEY, nthreads); conf.setInt(BUFFER_SIZE_KEY, buffersize); conf.setLong(WRITER_DATARATE_KEY, datarate); conf.setLong("mapred.task.timeout", Long.MAX_VALUE); conf.set(OUTPUT_DIR_KEY, output); // set the output and input for the map reduce FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setNumReduceTasks(1); conf.setSpeculativeExecution(false); } // Clean up all directories in all namespaces private void cleanUpDirs(Configuration conf) throws IOException { List<InetSocketAddress> nameNodeAddrs = DFSUtil.getClientRpcAddresses(conf, null); for (InetSocketAddress nnAddr : nameNodeAddrs) { Configuration newConf = new Configuration(conf); newConf.set(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY, nnAddr.getHostName() + ":" + nnAddr.getPort()); NameNode.setupDefaultURI(newConf); FileSystem fs = FileSystem.get(newConf); if (fs.exists(new Path(dfs_output))) fs.delete(new Path(dfs_output), true); if (fs.exists(new Path(dfs_input))) fs.delete(new Path(dfs_input), true); if (fs.exists(new Path(input))) fs.delete(new Path(input), true); if (fs.exists(new Path(output))) fs.delete(new Path(output), true); } } private String getUniqueName(String prefix) { return prefix + "-" + uniqueId; } @Override public int run(String[] args) throws IOException { if (args.length < 1) { printUsage(); } testtype = args[0]; if (!Arrays.asList(testtypes).contains(testtype)) { System.err.println(testtype + " is not a supported test type"); printUsage(); } // running the Writting fsConfig = new Configuration(getConf()); dfs_output = getUniqueName(DFS_OUTPUT + testtype); dfs_input = getUniqueName(DFS_INPUT + testtype); input = getUniqueName(INPUT + testtype); output = getUniqueName(OUTPUT + testtype); workdir = input; cleanUpDirs(fsConfig); FileSystem fs = FileSystem.get(fsConfig); JobConf conf = new JobConf(fsConfig, DFSGeneralTest.class); conf.setJobName(getUniqueName("gentest-" + testtype)); conf.set(TEST_TYPE_KEY, testtype); String[] newArgs = initializeGeneralConf(args, conf); if (testtype.equals(GenWriterThread.TEST_TYPE)) { initializeGenWriterJob(newArgs, conf); } else if (testtype.equals(DatanodeBenThread.TEST_TYPE)) { initializeDatanodeBenJob(newArgs, conf); } else { printUsage(); } updateJobConf(conf, new Path(dfs_input), new Path(dfs_output, "results")); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); long endTime = System.currentTimeMillis(); printResult(fs, new Path(output, "results"), startTime, endTime); verifyFiles(fs); //Delete all related files if (cleanup) cleanUpDirs(fsConfig); return 0; } public static void main(String[] args) throws Exception { System.exit(ToolRunner.run(new DFSGeneralTest(), args)); } }