package org.apache.hadoop.hdfs; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.hadoop.mapred.CreateFiles; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * This program reads the directory structure and file structure from the input * directory and creates the namespace in the file system specified by the * configuration in the specified root. All the files are filled with 'a'. * * The synopsis of the command is java DataGenerator -inDir <inDir>: input * directory name where directory/file structures are stored. Its default value * is the current directory. -root <root>: the name of the root directory which * the new namespace is going to be placed under. Its default value is * "/testLoadSpace". */ @SuppressWarnings("deprecation") public class DataGenerator extends Configured implements Tool { private File inDir = StructureGenerator.DEFAULT_STRUCTURE_DIRECTORY; private Path root = DEFAULT_ROOT; private int nFiles; private FileSystem fs; private Configuration config; @SuppressWarnings("unused") final static private long BLOCK_SIZE = 10; final static String ROOT = "/root"; final static private String USAGE = "java DataGenerator " + "-inDir <inDir> " + "-root <root>"; /** default name of the root where the test namespace will be placed under */ final static Path DEFAULT_ROOT = new Path("/testLoadSpace"); /** * Main function. It first parses the command line arguments. It then reads * the directory structure from the input directory structure file and * creates directory structure in the file system namespace. Afterwards it * reads the file attributes and creates files in the file. All file content * is filled with 'a'. * * @return */ public int run(String[] args) throws Exception { int exitCode = 0; exitCode = init(args); if (exitCode != 0) { return exitCode; } // genDirStructure(); genFiles(); return exitCode; } /** Parse the command line arguments and initialize the data */ private int init(String[] args) { try { // initialize file system handle fs = FileSystem.get(getConf()); } catch (IOException ioe) { System.err.println("Can not initialize the file system: " + ioe.getLocalizedMessage()); return -1; } for (int i = 0; i < args.length; i++) { // parse command line if (args[i].equals("-root")) { root = new Path(args[++i]); } else if (args[i].equals("-inDir")) { inDir = new File(args[++i]); } else if (args[i].equals("-files")) nFiles = Integer.parseInt(args[++i]); else { System.err.println(USAGE); ToolRunner.printGenericCommandUsage(System.err); System.exit(-1); } } return 0; } /** * Read directory structure file under the input directory. Create each * directory under the specified root. The directory names are relative to * the specified root. */ @SuppressWarnings("unused") private void genDirStructure() throws IOException { BufferedReader in = new BufferedReader(new FileReader(new File(inDir, StructureGenerator.DIR_STRUCTURE_FILE_NAME))); String line; while ((line = in.readLine()) != null) { fs.mkdirs(new Path(root + line)); } } /** * Read file structure file under the input directory. Create each file * under the specified root. The file names are relative to the root. */ private void genFiles() throws IOException { // // BufferedReader in = new BufferedReader(new FileReader(new File(inDir, // StructureGenerator.FILE_STRUCTURE_FILE_NAME))); // String line; // while ((line = in.readLine()) != null) { // String[] tokens = line.split(" "); // if (tokens.length != 2) { // throw new IOException("Expect at most 2 tokens per line: " // + line); // } // String fileName = root + tokens[0]; // long fileSize = (long) (BLOCK_SIZE * Double.parseDouble(tokens[1])); // genFile(new Path(fileName), fileSize); // } config = new Configuration(getConf()); config.setInt("dfs.replication", 3); config.set("dfs.rootdir", root.toString()); JobConf job = new JobConf(config, DataGenerator.class); job.setJobName("data-genarator"); FileOutputFormat.setOutputPath(job, new Path("data-generator-result")); // create the input for the map-reduce job Path inputPath = new Path(ROOT + "load_input"); fs.mkdirs(inputPath); fs.copyFromLocalFile(new Path(inDir + "/" + StructureGenerator.FILE_STRUCTURE_FILE_NAME), inputPath); FileInputFormat.setInputPaths(job, new Path(ROOT + "load_input")); job.setInputFormat(TextInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CreateFiles.class); job.setNumMapTasks(nFiles/10); job.setNumReduceTasks(0); JobClient.runJob(job); } /** * Create a file with the name <code>file</code> and a length of * <code>fileSize</code>. The file is filled with character 'a'. */ @SuppressWarnings("unused") private void genFile(Path file, long fileSize) throws IOException { FSDataOutputStream out = fs.create(file, true, getConf().getInt("io.file.buffer.size", 4096), (short) getConf().getInt("dfs.replication", 3), fs.getDefaultBlockSize()); for (long i = 0; i < fileSize; i++) { out.writeByte('a'); } out.close(); } /** * Main program. * * @param args * Command line arguments * @throws Exception */ public static void main(String[] args) throws Exception { int res = ToolRunner .run(new Configuration(), new DataGenerator(), args); System.exit(res); } }