/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.fs.loadGenerator; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.ArrayList; import java.util.List; import java.util.Random; import org.apache.hadoop.util.ToolRunner; /** * This program generates a random namespace structure with the following * constraints: * 1. The number of subdirectories is a random number in [minWidth, maxWidth]. * 2. The maximum depth of each subdirectory is a random number * [2*maxDepth/3, maxDepth]. * 3. Files are randomly placed in the empty directories. The size of each * file follows Gaussian distribution. * The generated namespace structure is described by two files in the output * directory. Each line of the first file * contains the full name of a leaf directory. * Each line of the second file contains * the full name of a file and its size, separated by a blank. * * The synopsis of the command is * java StructureGenerator -maxDepth <maxDepth> : maximum depth of the directory tree; default is 5. -minWidth <minWidth> : minimum number of subdirectories per directories; default is 1 -maxWidth <maxWidth> : maximum number of subdirectories per directories; default is 5 -numOfFiles <#OfFiles> : the total number of files; default is 10. -avgFileSize <avgFileSizeInBlocks>: average size of blocks; default is 1. -outDir <outDir>: output directory; default is the current directory. -seed <seed>: random number generator seed; default is the current time. */ public class StructureGenerator { private int maxDepth = 5; private int minWidth = 1; private int maxWidth = 5; private int numOfFiles = 10; private double avgFileSize = 1; private File outDir = DEFAULT_STRUCTURE_DIRECTORY; final static private String USAGE = "java StructureGenerator\n" + "-maxDepth <maxDepth>\n" + "-minWidth <minWidth>\n" + "-maxWidth <maxWidth>\n" + "-numOfFiles <#OfFiles>\n" + "-avgFileSize <avgFileSizeInBlocks>\n" + "-outDir <outDir>\n" + "-seed <seed>"; private Random r = null; /** Default directory for storing file/directory structure */ final static File DEFAULT_STRUCTURE_DIRECTORY = new File("."); /** The name of the file for storing directory structure */ final static String DIR_STRUCTURE_FILE_NAME = "dirStructure"; /** The name of the file for storing file structure */ final static String FILE_STRUCTURE_FILE_NAME = "fileStructure"; /** The name prefix for the files created by this program */ final static String FILE_NAME_PREFIX = "_file_"; /** * The main function first parses the command line arguments, * then generates in-memory directory structure and outputs to a file, * last generates in-memory files and outputs them to a file. */ public int run(String[] args) throws Exception { int exitCode = 0; exitCode = init(args); if (exitCode != 0) { return exitCode; } genDirStructure(); output(new File(outDir, DIR_STRUCTURE_FILE_NAME)); genFileStructure(); outputFiles(new File(outDir, FILE_STRUCTURE_FILE_NAME)); return exitCode; } /** Parse the command line arguments and initialize the data */ private int init(String[] args) { try { for (int i = 0; i < args.length; i++) { // parse command line if (args[i].equals("-maxDepth")) { maxDepth = Integer.parseInt(args[++i]); if (maxDepth<1) { System.err.println("maxDepth must be positive: " + maxDepth); return -1; } } else if (args[i].equals("-minWidth")) { minWidth = Integer.parseInt(args[++i]); if (minWidth<0) { System.err.println("minWidth must be positive: " + minWidth); return -1; } } else if (args[i].equals("-maxWidth")) { maxWidth = Integer.parseInt(args[++i]); } else if (args[i].equals("-numOfFiles")) { numOfFiles = Integer.parseInt(args[++i]); if (numOfFiles<1) { System.err.println("NumOfFiles must be positive: " + numOfFiles); return -1; } } else if (args[i].equals("-avgFileSize")) { avgFileSize = Double.parseDouble(args[++i]); if (avgFileSize<=0) { System.err.println("AvgFileSize must be positive: " + avgFileSize); return -1; } } else if (args[i].equals("-outDir")) { outDir = new File(args[++i]); } else if (args[i].equals("-seed")) { r = new Random(Long.parseLong(args[++i])); } else { System.err.println(USAGE); ToolRunner.printGenericCommandUsage(System.err); return -1; } } } catch (NumberFormatException e) { System.err.println("Illegal parameter: " + e.getLocalizedMessage()); System.err.println(USAGE); return -1; } if (maxWidth < minWidth) { System.err.println( "maxWidth must be bigger than minWidth: " + maxWidth); return -1; } if (r==null) { r = new Random(); } return 0; } /** In memory representation of a directory */ private static class INode { private String name; private List<INode> children = new ArrayList<INode>(); /** Constructor */ private INode(String name) { this.name = name; } /** Add a child (subdir/file) */ private void addChild(INode child) { children.add(child); } /** Output the subtree rooted at the current node. * Only the leaves are printed. */ private void output(PrintStream out, String prefix) { prefix = prefix==null?name:prefix+"/"+name; if (children.isEmpty()) { out.println(prefix); } else { for (INode child : children) { child.output(out, prefix); } } } /** Output the files in the subtree rooted at this node */ protected void outputFiles(PrintStream out, String prefix) { prefix = prefix==null?name:prefix+"/"+name; for (INode child : children) { child.outputFiles(out, prefix); } } /** Add all the leaves in the subtree to the input list */ private void getLeaves(List<INode> leaves) { if (children.isEmpty()) { leaves.add(this); } else { for (INode child : children) { child.getLeaves(leaves); } } } } /** In memory representation of a file */ private static class FileINode extends INode { private double numOfBlocks; /** constructor */ private FileINode(String name, double numOfBlocks) { super(name); this.numOfBlocks = numOfBlocks; } /** Output a file attribute */ protected void outputFiles(PrintStream out, String prefix) { prefix = (prefix == null)?super.name: prefix + "/"+super.name; out.println(prefix + " " + numOfBlocks); } } private INode root; /** Generates a directory tree with a max depth of <code>maxDepth</code> */ private void genDirStructure() { root = genDirStructure("", maxDepth); } /** Generate a directory tree rooted at <code>rootName</code> * The number of subtree is in the range of [minWidth, maxWidth]. * The maximum depth of each subtree is in the range of * [2*maxDepth/3, maxDepth]. */ private INode genDirStructure(String rootName, int maxDepth) { INode root = new INode(rootName); if (maxDepth>0) { maxDepth--; int minDepth = maxDepth*2/3; // Figure out the number of subdirectories to generate int numOfSubDirs = minWidth + r.nextInt(maxWidth-minWidth+1); // Expand the tree for (int i=0; i<numOfSubDirs; i++) { int childDepth = (maxDepth == 0)?0: (r.nextInt(maxDepth-minDepth+1)+minDepth); INode child = genDirStructure("dir"+i, childDepth); root.addChild(child); } } return root; } /** Collects leaf nodes in the tree */ private List<INode> getLeaves() { List<INode> leaveDirs = new ArrayList<INode>(); root.getLeaves(leaveDirs); return leaveDirs; } /** Decides where to place all the files and its length. * It first collects all empty directories in the tree. * For each file, it randomly chooses an empty directory to place the file. * The file's length is generated using Gaussian distribution. */ private void genFileStructure() { List<INode> leaves = getLeaves(); int totalLeaves = leaves.size(); for (int i=0; i<numOfFiles; i++) { int leaveNum = r.nextInt(totalLeaves); double fileSize; do { fileSize = r.nextGaussian()+avgFileSize; } while (fileSize<0); leaves.get(leaveNum).addChild( new FileINode(FILE_NAME_PREFIX+i, fileSize)); } } /** Output directory structure to a file, each line of the file * contains the directory name. Only empty directory names are printed. */ private void output(File outFile) throws FileNotFoundException { System.out.println("Printing to " + outFile.toString()); PrintStream out = new PrintStream(outFile); root.output(out, null); out.close(); } /** Output all files' attributes to a file, each line of the output file * contains a file name and its length. */ private void outputFiles(File outFile) throws FileNotFoundException { System.out.println("Printing to " + outFile.toString()); PrintStream out = new PrintStream(outFile); root.outputFiles(out, null); out.close(); } /** * Main program * @param args Command line arguments * @throws Exception */ public static void main(String[] args) throws Exception { StructureGenerator sg = new StructureGenerator(); System.exit(sg.run(args)); } }