/** * $Id$ * * Author: Thilee Subramaniam * * Copyright 2012 Quantcast Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy * of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * This Java client performs filesystem meta opetarions on the Hadoop namenode * using HDFS DFSClient. */ import java.io.BufferedReader; import java.io.DataInputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Random; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSInputStream; public class MStress_Client { // all of the files creates in HDFS are stored under TEST_BASE_DIR static final String TEST_BASE_DIR = new String("/mstress"); static DFSClient dfsClient_ = null; static StringBuilder path_ = new StringBuilder(4096); static int pathLen_ = 0; static int totalCreateCount = 0; static final int COUNT_INCR = 500; //From commandline static String dfsServer_ = ""; static int dfsPort_ = 0; static String testName_ = ""; static String prefix_ = "PATH_PREFIX_"; static int prefixLen_ = 0; static String planfilePath_ = ""; static String hostName_ = ""; static String processName_ = ""; //From plan file static String type_ = ""; static int levels_ = 0; static int inodesPerLevel_ = 0; static int pathsToStat_ = 0; /* * record keeping. files_ is populuated by CreateDFSPaths(), and is later * used in createWriteDFSPaths() to write to the list of files created by * this instance of the benchmark */ static private HashMap<String, OutputStream> files_ = new HashMap<String, OutputStream>(); // latency metrics for DFSClient RPCs. private static ArrayList<Double> timingMkdirs_ = new ArrayList<Double>(); private static ArrayList<Double> timingCreate_ = new ArrayList<Double>(); private static ArrayList<Double> timingWrite_ = new ArrayList<Double>(); private static ArrayList<Double> timingStat_ = new ArrayList<Double>(); private static ArrayList<Double> timingListPaths_ = new ArrayList<Double>(); private static ArrayList<Double> timingOpen_ = new ArrayList<Double>(); private static ArrayList<Double> timingDelete_ = new ArrayList<Double>(); private static ArrayList<Double> timingRename_ = new ArrayList<Double>(); // this is the data written to the files created by the create-write benchmark static private String data_ = "SOME GARBAGE DATA"; public static void main(String args[]) { parseOptions(args); int result = 0; try { Configuration conf = new Configuration(true); String confSet = "hdfs://" + dfsServer_ + ":" + dfsPort_; conf.set("fs.default.name", confSet); conf.set("fs.trash.interval", "0"); InetSocketAddress inet = new InetSocketAddress(dfsServer_, dfsPort_); dfsClient_ = new DFSClient(inet, conf); if (parsePlanFile() < 0) { System.exit(-1); } /* * create: creates directory and file inodes * create_write: creates directory and file inodes, and writes data_ to them * stat: getFileInfo() on all the files * readdir: listPaths() on all the directories * read: reads a byte from all the files * rename: renames all the files by appending '_x' to them * delete: delets all the files and directories (assumes they end with '_x') */ if (testName_.equals("create")) { result = createDFSPaths(); } else if (testName_.equals("create_write")) { result = createWriteDFSPaths(); } else if (testName_.equals("stat")) { result = statDFSPaths(); } else if (testName_.equals("readdir")) { result = listDFSPaths(); } else if (testName_.equals("read")) { result = readDFSPaths(); } else if (testName_.equals("rename")) { result = renameDFSPaths(); } else if (testName_.equals("delete")) { result = removeDFSPaths(); } else { System.out.printf("Error: unrecognized test \'%s\'\n", testName_); System.exit(-1); } if (result != 0) { System.err.printf("test %s finished with a return of -1.\n", testName_); } Collections.sort(timingMkdirs_); Collections.sort(timingCreate_); Collections.sort(timingWrite_); Collections.sort(timingStat_); Collections.sort(timingListPaths_); Collections.sort(timingOpen_); Collections.sort(timingDelete_); Collections.sort(timingRename_); System.out.println("[benchmark] mkdirs: " + getTimings(timingMkdirs_)); System.out.println("[benchmark] create: " + getTimings(timingCreate_)); System.out.println("[benchmark] write: " + getTimings(timingWrite_)); System.out.println("[benchmark] getFileInfo: " + getTimings(timingStat_)); System.out.println("[benchmark] listPaths: " + getTimings(timingListPaths_)); System.out.println("[benchmark] open: " + getTimings(timingOpen_)); System.out.println("[benchmark] delete: " + getTimings(timingDelete_)); System.out.println("[benchmark] rename: " + getTimings(timingRename_)); } catch( IOException e) { e.printStackTrace(); System.exit(-1); } if (result != 0) { System.exit(-1); } return; } /** * Parses command line options. * the hostName, processName are used to namespace the files created by this * instance of the benchmark. */ private static void parseOptions(String args[]) { if (!(args.length == 14 || args.length == 12 || args.length == 5)) { usage(); } /* * As described in usage(): * -s dfs-server * -p dfs-port [-t [create|create-write|stat|readdir|read|rename|delete] * -a planfile-path * -c host * -n process-name * -P prefix */ for (int i = 0; i < args.length; i++) { if (args[i].equals("-s") && i+1 < args.length) { dfsServer_ = args[i+1]; System.out.println(args[i+1]); i++; } else if (args[i].equals("-p") && i+1 < args.length) { dfsPort_ = Integer.parseInt(args[i+1]); System.out.println(args[i+1]); i++; } else if (args[i].equals("-t") && i+1 < args.length) { testName_ = args[i+1]; System.out.println(args[i+1]); i++; } else if (args[i].equals("-a") && i+1 < args.length) { planfilePath_ = args[i+1]; System.out.println(args[i+1]); i++; } else if (args[i].equals("-c") && i+1 < args.length) { hostName_ = args[i+1]; System.out.println(args[i+1]); i++; } else if (args[i].equals("-n") && i+1 < args.length) { processName_ = args[i+1]; System.out.println(args[i+1]); i++; } else if (args[i].equals("-P") && i+1 < args.length) { prefix_ = args[i+1]; System.out.println(args[i+1]); i++; } } if (dfsServer_.length() == 0 || testName_.length() == 0 || planfilePath_.length() == 0 || hostName_.length() == 0 || processName_.length() == 0 || dfsPort_ == 0) { usage(); } if (prefix_ == null) { prefix_ = new String("PATH_PREFIX_"); } prefixLen_ = prefix_.length(); } /** * Prints usage information to standard out. */ private static void usage() { String className = MStress_Client.class.getName(); System.out.printf("Usage: java %s -s dfs-server -p dfs-port" + "[-t [create|stat|read|readdir|delete|rename] -a planfile-path -c host -n process-name" + " -P prefix]\n", className); System.out.printf(" -t: this option requires -a, -c, and -n options.\n"); System.out.printf(" -P: default prefix is PATH_.\n"); System.out.printf("eg:\n"); System.out.printf(" java %s -s <metaserver-host> -p <metaserver-port> -t create" + " -a <planfile> -c localhost -n Proc_00\n", className); System.exit(1); } /** * Parses the plan file that contains parameters for the benchmark. */ private static int parsePlanFile() { int ret = -1; try { FileInputStream fis = new FileInputStream(planfilePath_); DataInputStream dis = new DataInputStream(fis); BufferedReader br = new BufferedReader(new InputStreamReader(dis)); if (prefix_.isEmpty()) { prefix_ = "PATH_PREFIX_"; } String line; while ((line = br.readLine()) != null) { if (line.length() == 0 || line.startsWith("#")) { continue; } if (line.startsWith("type=")) { type_ = line.substring(5); continue; } if (line.startsWith("levels=")) { levels_ = Integer.parseInt(line.substring(7)); continue; } if (line.startsWith("inodes=")) { inodesPerLevel_ = Integer.parseInt(line.substring(7)); continue; } if (line.startsWith("nstat=")) { pathsToStat_ = Integer.parseInt(line.substring(6)); continue; } } dis.close(); if (levels_ > 0 && !type_.isEmpty() && inodesPerLevel_ > 0 && pathsToStat_ > 0) { ret = 0; } } catch (Exception e) { System.out.println("Error: " + e.getMessage()); } return ret; } /** * Measure the elapsed time between alpha and zigma. */ private static long timeDiffMilliSec(Date alpha, Date zigma) { return zigma.getTime() - alpha.getTime(); } /** * Recursively creates directories and files. * @param level current level of depth * @param parentPath the prefix to the path we have traversed into so far * @return -1 on error, 0 on success */ private static int CreateDFSPaths(int level, String parentPath) { Boolean isLeaf = false; Boolean isDir = false; if (level + 1 >= levels_) { isLeaf = true; } if (isLeaf) { if (type_.equals("dir")) { isDir = true; } else { isDir = false; } } else { isDir = true; } for (int i = 0; i < inodesPerLevel_; i++) { String path = parentPath + "/" + prefix_ + Integer.toString(i); //System.out.printf("Creating (isdir=%b) [%s]\n", isDir, path.toString()); if (isDir) { try { long startTime = System.nanoTime(); if (dfsClient_.mkdirs(path) == false) { System.out.printf("Error in mkdirs(%s)\n", path); return -1; } timingMkdirs_.add(new Double((System.nanoTime() - startTime)/(1E9))); System.out.printf("Creating dir %s\n", path); totalCreateCount ++; if (totalCreateCount % COUNT_INCR == 0) { System.out.printf("Created paths so far: %d\n", totalCreateCount); } if (!isLeaf) { if (CreateDFSPaths(level+1, path) < 0) { System.out.printf("Error in CreateDFSPaths(%s)\n", path); return -1; } } } catch(IOException e) { e.printStackTrace(); return -1; } } else { try { System.out.printf("Creating file %s\n", path); long startTime = System.nanoTime(); OutputStream os = dfsClient_.create(path, true); timingCreate_.add(new Double((System.nanoTime() - startTime)/(1E9))); files_.put(path, os); totalCreateCount ++; if (totalCreateCount % COUNT_INCR == 0) { System.out.printf("Created paths so far: %d\n", totalCreateCount); } } catch( IOException e) { e.printStackTrace(); return -1; } } } return 0; } /** * Creates directories and files. See CreateDFSPath(), the main recursive * portion of this method. * @return -1 on error, 0 on success */ private static int createDFSPaths() { String basePath = new String(TEST_BASE_DIR) + "/" + hostName_ + "_" + processName_; try { long startTime = System.nanoTime(); Boolean ret = dfsClient_.mkdirs(basePath); timingMkdirs_.add(new Double((System.nanoTime() - startTime)/(1E9))); if (!ret) { System.out.printf("Error: failed to create test base dir [%s]\n", basePath); return -1; } } catch( IOException e) { e.printStackTrace(); throw new RuntimeException(); } Date alpha = new Date(); if (CreateDFSPaths(0, basePath) < 0) { return -1; } Date zigma = new Date(); System.out.printf("Client: %d paths created in %d msec\n", totalCreateCount, timeDiffMilliSec(alpha, zigma)); return 0; } /** * This creates DFS paths and writes data_ to them in one go. * @return -1 on error, 0 on success */ private static int createWriteDFSPaths() { if (createDFSPaths() != 0) { return -1; } try { // write to all the files! for (Map.Entry<String, OutputStream> file : files_.entrySet()) { OutputStream os = file.getValue(); long startTime = System.nanoTime(); os.write(data_.getBytes()); timingWrite_.add(new Double((System.nanoTime() - startTime)/(1E9))); os.close(); } } catch (IOException e) { e.printStackTrace(); return -1; } return 0; } /** * This calls getFileInfo() on all the files under this hostname + process' namespace. * @return -1 on error, 0 on success */ private static int statDFSPaths() { String statPath = new String(TEST_BASE_DIR) + "/" + hostName_ + "_" + processName_; System.out.printf("Stating %s ...\n", statPath); int countLeaf = (int) Math.round(Math.pow(inodesPerLevel_, levels_)); int[] leafIdxRangeForDel = new int[countLeaf]; for(int i=0;i<countLeaf;i++) leafIdxRangeForDel[i] = i; Collections.shuffle(Arrays.asList(leafIdxRangeForDel)); Date alpha = new Date(); try { for(int idx : leafIdxRangeForDel) { String path = ""; for(int lev=0; lev < levels_; lev++) { int delta = idx % inodesPerLevel_; idx /= inodesPerLevel_; if(path.length() > 0) { path = prefix_ + delta + "/" + path; } else { path = prefix_ + delta; } } long startTime = System.nanoTime(); dfsClient_.getFileInfo(statPath + "/" + path); timingStat_.add(new Double((System.nanoTime() - startTime)/(1E9))); } } catch(IOException e) { e.printStackTrace(); return -1; } Date zigma = new Date(); System.out.printf("Client: Stat'd all files in %s. Stat took %d msec\n", statPath, timeDiffMilliSec(alpha, zigma)); return 0; } /** * Renames all the files in this hostName/process' namespace by appending * '_x' to them. * @return -1 on error, 0 on success */ private static int renameDFSPaths() { String renamePath = new String(TEST_BASE_DIR) + "/" + hostName_ + "_" + processName_; System.out.printf("Renaming %s ...\n", renamePath); int countLeaf = (int) Math.round(Math.pow(inodesPerLevel_, levels_)); int[] leafIdxRangeForDel = new int[countLeaf]; for(int i=0;i<countLeaf;i++) leafIdxRangeForDel[i] = i; Collections.shuffle(Arrays.asList(leafIdxRangeForDel)); Date alpha = new Date(); try { for(int idx : leafIdxRangeForDel) { String path = ""; for(int lev=0; lev < levels_; lev++) { int delta = idx % inodesPerLevel_; idx /= inodesPerLevel_; if(path.length() > 0) { path = prefix_ + delta + "/" + path; } else { path = prefix_ + delta; } } long startTime = System.nanoTime(); dfsClient_.rename(renamePath + "/" + path, renamePath + "/" + path + "_x"); timingRename_.add(new Double((System.nanoTime() - startTime)/(1E9))); } } catch(IOException e) { e.printStackTrace(); return -1; } Date zigma = new Date(); System.out.printf("Client: Renamed all files in %s. Rename took %d msec\n", renamePath, timeDiffMilliSec(alpha, zigma)); return 0; } /** * Lists all the directories contents in this hostname/process' namespace. * @return -1 on error, 0 on success */ private static int listDFSPaths() { Date alpha = new Date(); int inodeCount = 0; String basePath = new String(TEST_BASE_DIR) + "/" + hostName_ + "_" + processName_; Queue<String> pending = new LinkedList<String>(); pending.add(basePath); while (!pending.isEmpty()) { String parent = pending.remove(); try { long startTime = System.nanoTime(); FileStatus[] children = dfsClient_.listPaths(parent); timingListPaths_.add(new Double((System.nanoTime() - startTime)/(1E9))); if (children == null || children.length == 0) { continue; } for (int i = 0; i < children.length; i++) { String localName = children[i].getPath().getName(); if (localName.equals(".") || localName.equals("..")) { continue; } inodeCount ++; if (inodeCount % COUNT_INCR == 0) { System.out.printf("Readdir paths so far: %d\n", inodeCount); } if (children[i].isDir()) { pending.add(parent + "/" + localName); } else { files_.put(parent + "/" + localName, null); } } } catch (IOException e) { e.printStackTrace(); return -1; } } Date zigma = new Date(); System.out.printf("Client: Directory walk done over %d inodes in %d msec\n", inodeCount, timeDiffMilliSec(alpha, zigma)); return 0; } /** * lists, and then reads the first byte in all of the files in * this hostname/process' namespace * @return -1 on error, 0 on success */ private static int readDFSPaths() { if (listDFSPaths() != 0) { return -1; } try{ for (Map.Entry<String, OutputStream> file : files_.entrySet()) { long startTime = System.nanoTime(); DFSInputStream os = dfsClient_.open(file.getKey()); timingOpen_.add(new Double((System.nanoTime() - startTime)/(1E9))); os.read(); os.close(); } } catch (IOException e) { e.printStackTrace(); } return 0; } /** * deletes all the files in this hostname/process' namespace. * assumes that they've all been renamed (i.e., the rename benchmark already * ran) * @return -1 on error, 0 on sucess */ private static int removeDFSPaths() { String rmPath = new String(TEST_BASE_DIR) + "/" + hostName_ + "_" + processName_; System.out.printf("Deleting %s ...\n", rmPath); int countLeaf = (int) Math.round(Math.pow(inodesPerLevel_, levels_)); int[] leafIdxRangeForDel = new int[countLeaf]; for(int i=0;i<countLeaf;i++) leafIdxRangeForDel[i] = i; Collections.shuffle(Arrays.asList(leafIdxRangeForDel)); Date alpha = new Date(); try { for(int idx : leafIdxRangeForDel) { String path = ""; for(int lev=0; lev < levels_; lev++) { int delta = idx % inodesPerLevel_; idx /= inodesPerLevel_; if(path.length() > 0) { path = prefix_ + delta + "/" + path; } else { path = prefix_ + delta; } } long startTime = System.nanoTime(); dfsClient_.delete(rmPath + "/" + path + "_x", true); timingDelete_.add(new Double((System.nanoTime() - startTime)/(1E9))); } dfsClient_.delete(rmPath, true); } catch(IOException e) { e.printStackTrace(); return -1; } Date zigma = new Date(); System.out.printf("Client: Deleted %s. Delete took %d msec\n", rmPath, timeDiffMilliSec(alpha, zigma)); return 0; } private static String getTimings(ArrayList<Double> array) { StringBuilder output = new StringBuilder(); for (Double i : array) { output.append(i.toString()); output.append(","); } if (output.length() > 0) { output.deleteCharAt(output.length() - 1); } return output.toString(); } }