HadoopArchives.java example

Explorer
hadoop-20-master
- src
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.tools;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.TreeMap;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.HarFileSystem;
import org.apache.hadoop.fs.HarFileSystem.HarFSDataInputStream;
import org.apache.hadoop.fs.HarProperties;
import org.apache.hadoop.fs.HarReader;
import org.apache.hadoop.fs.HarStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.SequenceFileRecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.lib.MultipleInputs;
import org.apache.hadoop.mapred.lib.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * a archive creation utility.
 * This class provides methods that can be used 
 * to create hadoop archives. For understanding of 
 * Hadoop archives look at {@link HarFileSystem}.
 */
public class HadoopArchives implements Tool {
  public static final int VERSION = 2;
  private static final Log LOG = LogFactory.getLog(HadoopArchives.class);
  
  private static final String NAME = "har"; 
  /** name of the file in hdfs to read the files needed to archive **/
  private static final String SRC_LIST_LABEL = NAME + ".src.list";
  
  /** total size of the files to archive **/
  private static final String TOTAL_SIZE_LABEL = NAME + ".total.size";
  
  /** path to be used as a parent to archive files **/ 
  private static final String SRC_PARENT_LABEL = NAME + ".parent.path";

  /** the size of the blocks that will be created when archiving **/
  private static final String HAR_BLOCKSIZE_LABEL = NAME + ".block.size";
  private static final long HAR_BLOCKSIZE_DEFAULT = 512 * 1024 * 1024l; 

  /**the size of the part files that will be created when archiving **/
  private static final String HAR_PARTSIZE_LABEL = NAME + ".partfile.size";
  private static final long HAR_PARTSIZE_DEFAULT = 4 * 1024 * 1024 * 1024l; 

  private static final String PART_ID_OFFSET = NAME + ".partid.offset";
  
  /** number of lines in each block of _index file
   * see {@link HarFileSystem} for more details
   */
  private static final long NUM_LINES_IN_BLOCK_INDEX = 100l;
  
  private static final String PART_PREFIX = "part-";
  
  private static final String USAGE = 
    "USAGE: java HadoopArchives [options]: \n" +
    "   archive -archiveName   NAME -p <parent path> <src>* <dst>\n" +
    "   archive -append        <archiveName> -p <parent path> <src>* <dstArchive>\n" +
    "   archive -appendFromArchive <srcArchive> <archivePaths>* <dstArchive>\n" +
    "   archive -copyFromLocal <srcDir> <dstArchive>\n" +
    "   archive -copyToLocal   <srcArchive> <dstDir>\n";

  private Path jobDirectory;
  private Path srcFiles;
  
  private JobConf conf;

  public void setConf(Configuration conf) {
    if (conf instanceof JobConf) {
      this.conf = (JobConf) conf;
    } else {
      this.conf = new JobConf(conf, HadoopArchives.class);
    }
  }

  public Configuration getConf() {
    return this.conf;
  }

  /** map of possible run modes **/
  private Map<String, Executor> executors;

  public HadoopArchives(Configuration conf) {
    setConf(conf);
    
    executors = new HashMap<String, HadoopArchives.Executor>();
    executors.put("-archiveName", new ArchiveExecutor(conf));
    executors.put("-append", new AppendExecutor(conf));
    executors.put("-appendFromArchive", new AppendFromArchiveExecutor(conf));
    executors.put("-copyFromLocal", new CopyFromLocalExecutor(conf));
    executors.put("-copyToLocal", new CopyToLocalExecutor(conf));
  }

  // check the src paths
  private static void checkPaths(Configuration conf, List<Path> paths) throws
  IOException {
    for (Path p : paths) {
      FileSystem fs = p.getFileSystem(conf);
      if (!fs.exists(p)) {
        throw new FileNotFoundException("Source " + p + " does not exist.");
      }
    }
  }

  /**
   * this assumes that there are two types of files file/dir
   * @param fs the input filesystem
   * @param p the top level path 
   * @param out the list of paths output of recursive ls
   * @throws IOException
   */
  private void recursivels(FileSystem fs, Path p, List<FileStatus> out) 
  throws IOException {
    FileStatus fstatus = fs.getFileStatus(p);
    if (!fstatus.isDir()) {
      out.add(fstatus);
      return;
    }
    else {
      out.add(fstatus);
      FileStatus[] listStatus = fs.listStatus(p);
      for (FileStatus stat: listStatus) {
        recursivels(fs, stat.getPath(), out);
      }
    }
  }

  private static class HarEntry implements Writable {
    String path;
    String[] children;
    HarProperties properties;

    HarEntry() {}

    public HarEntry(String path, String[] children, HarProperties properties) {
      this.path = path;
      this.children = children;
      this.properties = properties;
    }

    private boolean isDir() {
      return children != null;      
    }

    @Override
    public void write(DataOutput out) throws IOException {
      Text.writeString(out, path);

      final boolean dir = isDir();
      out.writeBoolean(dir);
      if (dir) {
        out.writeInt(children.length);
        for(String c : children) {
          Text.writeString(out, c);
        }
      }
      properties.write(out);
    }
    
    @Override
    public void readFields(DataInput in) throws IOException {
      path = Text.readString(in);

      if (in.readBoolean()) {
        children = new String[in.readInt()];
        for(int i = 0; i < children.length; i++) {
          children[i] = Text.readString(in);
        }
      } else {
        children = null;
      }
      properties = new HarProperties();
      properties.readFields(in);
    }

    public HarProperties getProperties() {
      return properties;
    }

  }

  private boolean checkValidName(String name) {
    Path tmp = new Path(name);
    if (tmp.depth() != 1) {
      return false;
    }
    if (name.endsWith(".har")) 
      return true;
    return false;
  }
  

  private Path largestDepth(List<Path> paths) {
    Path deepest = paths.get(0);
    for (Path p: paths) {
      if (p.depth() > deepest.depth()) {
        deepest = p;
      }
    }
    return deepest;
  }

  /**
   * truncate the prefix root from the full path
   * @param fullPath the full path
   * @param root the prefix root to be truncated
   * @return the relative path
   */
  private Path relPathToRoot(Path fullPath, Path root) {
    // just take some effort to do it 
    // rather than just using substring 
    // so that we do not break sometime later
    Path justRoot = new Path(Path.SEPARATOR);
    if (fullPath.depth() == root.depth()) {
      return justRoot;
    }
    else if (fullPath.depth() > root.depth()) {
      Path retPath = new Path(fullPath.getName());
      Path parent = fullPath.getParent();
      for (int i=0; i < (fullPath.depth() - root.depth() -1); i++) {
        retPath = new Path(parent.getName(), retPath);
        parent = parent.getParent();
      }
      return new Path(justRoot, retPath);
    }
    return null;
  }

  /**
   * this method writes all the valid top level directories into the srcWriter
   * for indexing. This method is a little tricky. example- for an input with
   * parent path /home/user/ and sources as /home/user/source/dir1,
   * /home/user/source/dir2 - this will output {@code <source, dir, dir1, dir2>}
   * (dir means that source is a dir with dir1 and dir2 as children),
   * {@code <source/dir1, file, null>}, {@code <source/dir2, file, null>} and
   * {@code </, dir, source>}
   * 
   * @param srcWriter
   *          the sequence file writer to write the directories to
   * @param paths
   *          the source paths provided by the user. They are glob free and have
   *          full path (not relative paths)
   * @param parentPath
   *          the parent path that you want the archives to be relative to.
   *          example - /home/user/dir1 can be archived with parent as /home or
   *          /home/user.
   * @throws IOException
   */
  private void writeTopLevelDirs(SequenceFile.Writer srcWriter, 
      List<Path> paths, Path parentPath) throws IOException {
    /* find all the common parents of paths that are valid archive
     * paths. The below is done so that we do not add a common path
     * twice and also we need to only add valid child of a path that
     * are specified the user.
     */
    TreeMap<Path, HashSet<String>> allpaths = new TreeMap<Path, 
                                                HashSet<String>>();
    /* the largest depth of paths. the max number of times
     * we need to iterate
     */
    Path deepest = largestDepth(paths);
    Path root = new Path(Path.SEPARATOR);

    List<Path> justDirs = paths;
    for (int i = parentPath.depth(); i < deepest.depth(); i++) {
      List<Path> parents = new ArrayList<Path>();
      for (Path p: justDirs) {
        if (p.compareTo(root) == 0){
          //do nothing
        }
        else {
          Path parent = p.getParent();
          if (allpaths.containsKey(parent)) {
            HashSet<String> children = allpaths.get(parent);
            children.add(p.getName());
          }
          else {
            HashSet<String> children = new HashSet<String>();
            children.add(p.getName());
            allpaths.put(parent, children);
          }
          parents.add(parent);
        }
      }
      justDirs = parents;
    }
    Set<Map.Entry<Path, HashSet<String>>> keyVals = allpaths.entrySet();
    for (Map.Entry<Path, HashSet<String>> entry : keyVals) {
      Path currentPath = entry.getKey();
      Path relPath = relPathToRoot(currentPath, parentPath);
      if (relPath != null) {
        FileSystem fs = currentPath.getFileSystem(getConf());
        HarProperties properties = new HarProperties(fs.getFileStatus(currentPath));
        final String[] children = new String[entry.getValue().size()];
        int i = 0;
        for(String child: entry.getValue()) {
          children[i++] = child;
        }
        HarEntry harEntry = new HarEntry(relPath.toString(), children, properties); 
        srcWriter.append(new LongWritable(0L), harEntry);
      }
    }
  }

  //delete the tmp job directory
  private void cleanJobDirectory() {
    try {
      FileSystem jobfs = jobDirectory.getFileSystem(conf);
      jobfs.delete(jobDirectory, true);
    } catch(IOException ioe) {
      LOG.warn("Unable to clean tmp directory " + jobDirectory, ioe);
    }
  }
  
  private long writeFromArchiveFilesToProcess(Path harSrc, List<Path> relativePaths) throws IOException {
    Set<Path> allowedPaths = new HashSet<Path>(relativePaths);
    
    Map<String, HashSet<String>> tree = new HashMap<String, HashSet<String>>();
    HashSet<String> toTake = new HashSet<String>();
    
    HarReader harReader = new HarReader(harSrc, conf);
    List<HarStatus> allStatuses = new ArrayList<HarStatus>();
    try {
      while (harReader.hasNext()) {
        allStatuses.add(harReader.getNext());
      }
    } finally {
      if (harReader != null) {
        harReader.close();
      }
    }

    Path root = new Path(Path.SEPARATOR);
    // decide which of the har files we need to process
    // and create in-memory tree structure of the files
    for (HarStatus harStatus : allStatuses) {
       Path path = new Path(harStatus.getName());
       Path currentPath = path;
       // decide whether we need to process this har-entry
       boolean allowed = false;
       for (int i = 0; i <= path.depth(); ++i) {
         if (allowedPaths.contains(currentPath)) {
           allowed = true;
           break;
         }
         currentPath = currentPath.getParent();
       }
       if (allowed) {
         currentPath = path;
         // update in-memory structure of har files
         for (int i = 0; i <= path.depth(); ++i) {
           toTake.add(currentPath.toString());
           if (currentPath.equals(root)) {
             break;
           }
           Path parent = currentPath.getParent();
           String parentString = parent.toString();
           HashSet<String> treeEntry = tree.get(parentString);
           if (treeEntry == null) {
             HashSet<String> value = new HashSet<String>();
             value.add(currentPath.getName());
             tree.put(parentString, value);
           } else {
             treeEntry.add(currentPath.getName());
           }
           currentPath = parent;
         }
       }
    }
    
    final String randomId = DistCp.getRandomId();
    jobDirectory = new Path(new JobClient(conf).getSystemDir(), NAME + "_" + randomId);
    
    //get a tmp directory for input splits
    FileSystem jobfs = jobDirectory.getFileSystem(conf);
    jobfs.mkdirs(jobDirectory);
    srcFiles = new Path(jobDirectory, "_har_src_files");
    SequenceFile.Writer srcWriter = SequenceFile.createWriter(jobfs, conf,
        srcFiles, LongWritable.class, HarEntry.class, 
        SequenceFile.CompressionType.NONE);

    long totalSize = 0;
    try {
      for (HarStatus harStatus : allStatuses) {
        String pathString = harStatus.getName();
        // skip items that we don't need
        if (!toTake.contains(pathString)) {
          continue;
        }

        HashSet<String> treeEntry = tree.get(pathString);
        String[] children;
        if (treeEntry == null) {
          children = null;
        } else {
          children = treeEntry.toArray(new String[0]);
        }
        HarEntry harEntry = new HarEntry(harStatus.getName(), children, harStatus.getProperties());
        srcWriter.append(new LongWritable(harStatus.getLength()), harEntry);
        srcWriter.sync();
        totalSize += harStatus.getLength();
      }
    } finally {
      srcWriter.close();
    }
    return totalSize;
  }
  
  private void appendFromArchive(Path harSrc, List<Path> relativePaths, Path harDst) throws IOException {
    Path outputPath = harDst;
    FileOutputFormat.setOutputPath(conf, outputPath);
    FileSystem outFs = outputPath.getFileSystem(conf);
    
    if (!outFs.exists(outputPath)) {
      throw new IOException("Invalid Output. HAR File " + outputPath + "doesn't exist");
    }
    if (outFs.isFile(outputPath)) {
      throw new IOException("Invalid Output. HAR File " + outputPath
          + "must be represented as directory");
    }
    long totalSize = writeFromArchiveFilesToProcess(harSrc, relativePaths);

    //make it a har path
    FileSystem fs1 = harSrc.getFileSystem(conf);
    URI uri = fs1.getUri();
    Path parentPath = new Path("har://" + "hdfs-" + uri.getHost() +":" +
        uri.getPort() + fs1.makeQualified(harSrc).toUri().getPath());
    FileSystem fs = parentPath.getFileSystem(conf);
    
    conf.set(SRC_LIST_LABEL, srcFiles.toString());
    conf.set(SRC_PARENT_LABEL, parentPath.makeQualified(fs).toString());
    conf.setLong(TOTAL_SIZE_LABEL, totalSize);
    long partSize = conf.getLong(HAR_PARTSIZE_LABEL, HAR_PARTSIZE_DEFAULT);
    int numMaps = (int) (totalSize / partSize);
    //run atleast one map.
    conf.setNumMapTasks(numMaps == 0 ? 1 : numMaps);
    conf.setNumReduceTasks(1);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.set("hadoop.job.history.user.location", "none");
    //make sure no speculative execution is done
    conf.setSpeculativeExecution(false);

    // set starting offset for mapper
    int partId = findFirstAvailablePartId(outputPath);
    conf.setInt(PART_ID_OFFSET, partId);
    
    Path index = new Path(outputPath, HarFileSystem.INDEX_NAME);
    Path indexDirectory = new Path(outputPath, HarFileSystem.INDEX_NAME + ".copy");
    outFs.mkdirs(indexDirectory);
    Path indexCopy = new Path(indexDirectory, "data");
    outFs.rename(index, indexCopy);

    MultipleInputs.addInputPath(conf, jobDirectory, HArchiveInputFormat.class,
        HArchivesMapper.class);
    MultipleInputs.addInputPath(conf, indexDirectory, TextInputFormat.class,
        HArchivesConvertingMapper.class);
    conf.setReducerClass(HArchivesMergingReducer.class);

    JobClient.runJob(conf);

    cleanJobDirectory();
  }
  
  /**
   * archive the given source paths into the dest
   * 
   * @param parentPath
   *          the parent path of all the source paths
   * @param srcPaths
   *          the src paths to be archived
   * @param dest
   *          the dest dir that will contain the archive
   * @param append
   *          append to existing archive or create new
   * 
   */
  private void archive(Path parentPath, List<Path> srcPaths, Path outputPath, boolean append)
      throws IOException {
    parentPath = parentPath.makeQualified(parentPath.getFileSystem(conf));
    checkPaths(conf, srcPaths);

    Path destinationDir = outputPath.getParent();
    FileOutputFormat.setOutputPath(conf, outputPath);
    FileSystem outFs = outputPath.getFileSystem(conf);
    
    if (append) {
      if (!outFs.exists(outputPath)) {
        throw new IOException("Invalid Output. HAR File " + outputPath + "doesn't exist");
      }
      if (outFs.isFile(outputPath)) {
        throw new IOException("Invalid Output. HAR File " + outputPath
            + "must be represented as directory");
      }
    } else {
      if (outFs.exists(outputPath)) {
        throw new IOException("Invalid Output: " + outputPath + ". File already exists");
      }
      if (outFs.isFile(destinationDir)) {
        throw new IOException("Invalid Output. " + outputPath + " is not a directory");
      }
    }
    long totalSize = writeFilesToProcess(parentPath, srcPaths);

    FileSystem fs = parentPath.getFileSystem(conf);
    
    conf.set(SRC_LIST_LABEL, srcFiles.toString());
    conf.set(SRC_PARENT_LABEL, parentPath.makeQualified(fs).toString());
    conf.setLong(TOTAL_SIZE_LABEL, totalSize);
    long partSize = conf.getLong(HAR_PARTSIZE_LABEL, HAR_PARTSIZE_DEFAULT);
    int numMaps = (int) (totalSize / partSize);
    //run atleast one map.
    conf.setNumMapTasks(numMaps == 0 ? 1 : numMaps);
    conf.setNumReduceTasks(1);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.set("hadoop.job.history.user.location", "none");
    //make sure no speculative execution is done
    conf.setSpeculativeExecution(false);

    if (append) {
      // set starting offset for mapper
      int partId = findFirstAvailablePartId(outputPath);
      conf.setInt(PART_ID_OFFSET, partId);
      
      Path index = new Path(outputPath, HarFileSystem.INDEX_NAME);
      Path indexDirectory = new Path(outputPath, HarFileSystem.INDEX_NAME + ".copy");
      outFs.mkdirs(indexDirectory);
      Path indexCopy = new Path(indexDirectory, "data");
      outFs.rename(index, indexCopy);

      MultipleInputs.addInputPath(conf, jobDirectory, HArchiveInputFormat.class,
          HArchivesMapper.class);
      MultipleInputs.addInputPath(conf, indexDirectory, TextInputFormat.class,
          HArchivesConvertingMapper.class);
      conf.setReducerClass(HArchivesMergingReducer.class);
    } else {
      conf.setMapperClass(HArchivesMapper.class);
      conf.setInputFormat(HArchiveInputFormat.class);
      FileInputFormat.addInputPath(conf, jobDirectory);
      conf.setReducerClass(HArchivesReducer.class);
    }

    JobClient.runJob(conf);

    cleanJobDirectory();
  }

  private long writeFilesToProcess(Path parentPath, List<Path> srcPaths) throws IOException {
    final String randomId = DistCp.getRandomId();
    jobDirectory = new Path(new JobClient(conf).getSystemDir(), NAME + "_" + randomId);
    
    //get a tmp directory for input splits
    FileSystem jobfs = jobDirectory.getFileSystem(conf);
    jobfs.mkdirs(jobDirectory);
    srcFiles = new Path(jobDirectory, "_har_src_files");
    SequenceFile.Writer srcWriter = SequenceFile.createWriter(jobfs, conf,
        srcFiles, LongWritable.class, HarEntry.class, 
        SequenceFile.CompressionType.NONE);
    long totalSize = 0;
    // get the list of files 
    // create single list of files and dirs
    FileSystem fs = parentPath.getFileSystem(conf);
    try {
      // write the top level dirs in first 
      writeTopLevelDirs(srcWriter, srcPaths, parentPath);
      srcWriter.sync();

      // these are the input paths passed 
      // from the command line
      // we do a recursive ls on these paths 
      // and then write them to the input file 
      // one at a time
      for (Path src: srcPaths) {
        ArrayList<FileStatus> allFiles = new ArrayList<FileStatus>();
        recursivels(fs, src, allFiles);
        for (FileStatus stat: allFiles) {
          long len = stat.isDir() ? 0 : stat.getLen();
          String path = relPathToRoot(stat.getPath(), parentPath).toString();
          String[] children = null;
          if (stat.isDir()) {
            //get the children 
            FileStatus[] list = fs.listStatus(stat.getPath());
            children = new String[list.length];
            for (int i = 0; i < list.length; i++) {
              children[i] = list[i].getPath().getName();
            }
          }
          HarEntry harEntry = new HarEntry(path, children, new HarProperties(stat));
          srcWriter.append(new LongWritable(len), harEntry);
          srcWriter.sync();
          totalSize += len;
        }
      }
    } finally {
      srcWriter.close();
    }
    jobfs.setReplication(srcFiles, (short) 10);
    return totalSize;
  }

  private int findFirstAvailablePartId(Path archivePath) throws IOException {
    FileSystem fs = archivePath.getFileSystem(conf);
    FileStatus[] fileStatuses = fs.listStatus(archivePath);
    int result = 0;
    for (FileStatus fileStatus : fileStatuses) {
      String name = fileStatus.getPath().getName();
      if (name.startsWith(PART_PREFIX)) {
        int id = Integer.parseInt(name.substring(PART_PREFIX.length()));
        result = Math.max(result, id + 1);
      }
    }
    return result;
  }

  /**
   * Input format of a hadoop archive job responsible for 
   * generating splits of the file list
   */
  private static class HArchiveInputFormat implements InputFormat<LongWritable, HarEntry> {
    //generate input splits from the src file lists
    public InputSplit[] getSplits(JobConf jconf, int numSplits)
    throws IOException {
      String srcfilelist = jconf.get(SRC_LIST_LABEL, "");
      if ("".equals(srcfilelist)) {
          throw new IOException("Unable to get the " +
              "src file for archive generation.");
      }
      long totalSize = jconf.getLong(TOTAL_SIZE_LABEL, -1);
      if (totalSize == -1) {
        throw new IOException("Invalid size of files to archive");
      }
      //we should be safe since this is set by our own code
      Path src = new Path(srcfilelist);
      FileSystem fs = src.getFileSystem(jconf);
      FileStatus fstatus = fs.getFileStatus(src);
      ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
      LongWritable key = new LongWritable();
      HarEntry value = new HarEntry();
      SequenceFile.Reader reader = null;
      // the remaining bytes in the file split
      long remaining = fstatus.getLen();
      // the count of sizes calculated till now
      long currentCount = 0L;
      // the endposition of the split
      long lastPos = 0L;
      // the start position of the split
      long startPos = 0L;
      long targetSize = totalSize / numSplits;
      // create splits of size target size so that all the maps 
      // have equals sized data to read and write to.
      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally { 
        reader.close();
      }
      return splits.toArray(new FileSplit[splits.size()]);
    }

    public RecordReader<LongWritable, HarEntry> getRecordReader(InputSplit split,
        JobConf job, Reporter reporter) throws IOException {
      return new SequenceFileRecordReader<LongWritable, HarEntry>(job,
                 (FileSplit)split);
    }
  }

  /**
   * A class that reads lines from _index file and writes
   * them with hash of the path field
   */
  private static class HArchivesConvertingMapper 
  implements Mapper<LongWritable, Text, IntWritable, Text> {

    public void configure(JobConf conf) {
    }

    public void map(LongWritable key, Text value, OutputCollector<IntWritable, Text> out,
        Reporter reporter) throws IOException {
      reporter.setStatus("Passing file " + value + " to archive.");
      reporter.progress();

      HarStatus harStatus = new HarStatus(value.toString());
      int hash = HarFileSystem.getHarHash(harStatus.getName());
      out.collect(new IntWritable(hash), value);
    }

    public void close() throws IOException {
    }
  }
  
  private static class HArchivesMapper 
  implements Mapper<LongWritable, HarEntry, IntWritable, Text> {
    private JobConf conf = null;
    private int partId = -1 ; 
    private Path tmpOutputDir = null;
    Path tmpOutput = null;
    String partName = null;
    Path rootPath = null;
    FSDataOutputStream partStream = null;
    FileSystem destFs = null;
    byte[] buffer;
    final static int BUFFER_SIZE = 128 * 1024;
    long blockSize;

    // configure the mapper and create 
    // the part file.
    // use map reduce framework to write into
    // tmp files. 
    public void configure(JobConf conf) {
      this.conf = conf;
      int partIdOffset = conf.getInt(PART_ID_OFFSET, 0);
      // this is tightly tied to map reduce
      // since it does not expose an api 
      // to get the partition
      partId = conf.getInt("mapred.task.partition", -1) + partIdOffset;
      // create a file name using the partition
      // we need to write to this directory
      tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
      blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, HAR_BLOCKSIZE_DEFAULT);
      // get the output path and write to the tmp 
      // directory 
      partName = PART_PREFIX + partId;
      tmpOutput = new Path(tmpOutputDir, partName);
      String rootPathString = conf.get(SRC_PARENT_LABEL, null); 
      if (rootPathString == null) { 
        throw new RuntimeException("Unable to read parent " +
        		"path for har from config");
      }
      rootPath = new Path(rootPathString);
      try {
        destFs = tmpOutput.getFileSystem(conf);
        //this was a stale copy
        if (destFs.exists(tmpOutput)) {
          destFs.delete(tmpOutput, false);
        } 
        partStream = destFs.create(tmpOutput, false, conf.getInt("io.file.buffer.size", 4096), 
            destFs.getDefaultReplication(), blockSize);
      } catch(IOException ie) {
        throw new RuntimeException("Unable to open output file " + tmpOutput, ie);
      }
      buffer = new byte[BUFFER_SIZE];
    }

    // copy raw data.
    public void copyData(Path input, FSDataInputStream fsin, 
        FSDataOutputStream fout, Reporter reporter) throws IOException {
      try {
        for (int cbread=0; (cbread = fsin.read(buffer))>= 0;) {
          fout.write(buffer, 0,cbread);
          reporter.progress();
        }
      } finally {
        fsin.close();
      }
    }

    /**
     * get rid of / in the beginning of path
     * @param p the path
     * @return return path without /
     */
    private Path realPath(Path p, Path parent) {
      Path rootPath = new Path(Path.SEPARATOR);
      if (rootPath.compareTo(p) == 0) {
        return parent;
      }
      return new Path(parent, new Path(p.toString().substring(1)));
    }

    // read files from the split input 
    // and write it onto the part files.
    // also output hash(name) and string 
    // for reducer to create index 
    // and masterindex files.
    public void map(LongWritable key, HarEntry value,
        OutputCollector<IntWritable, Text> out,
        Reporter reporter) throws IOException {
      Path relativePath = new Path(value.path);
      int hash = HarFileSystem.getHarHash(relativePath.toString());
      String towrite = null;
      Path srcPath = realPath(relativePath, rootPath);
      long startPos = partStream.getPos();
      FileSystem srcFs = srcPath.getFileSystem(conf);
      HarProperties properties = value.getProperties();
      String propStr = properties.serialize();
      if (value.isDir()) { 
        towrite = HarFileSystem.encode(relativePath.toString()) 
                  + " dir " + propStr + " 0 0 ";
        StringBuffer sbuff = new StringBuffer();
        sbuff.append(towrite);
        for (String child: value.children) {
          sbuff.append(HarFileSystem.encode(child) + " ");
        }
        towrite = sbuff.toString();
        //reading directories is also progress
        reporter.progress();
      }
      else {
        FSDataInputStream input = srcFs.open(srcPath);
        reporter.setStatus("Copying file " + srcPath + 
            " to archive.");
        copyData(srcPath, input, partStream, reporter);
        long len = partStream.getPos() - startPos;
        towrite = HarFileSystem.encode(relativePath.toString())
                  + " file " + partName + " " + startPos
                  + " " + len + " " + propStr + " ";
      }
      out.collect(new IntWritable(hash), new Text(towrite));
    }
    
    public void close() throws IOException {
      // close the part files.
      partStream.close();
    }
  }
  
  /**
   * Base reducer for creating the index and the master index
   */
  private static class HArchivesReducer implements Reducer<IntWritable, 
  Text, Text, Text> {
    private JobConf conf = null;
    private long startIndex = 0;
    private long endIndex = 0;
    private long startPos = 0;
    private Path masterIndex = null;
    private Path index = null;
    private FileSystem fs = null;
    private FSDataOutputStream outStream = null;
    private FSDataOutputStream indexStream = null;
    private Path tmpOutputDir = null;
    private int written = 0;
    private int keyVal = 0;
    
    /**
     * Configure the reducer: open the _index and _masterindex files for writing
     */
    public void configure(JobConf conf) {
      this.conf = conf;
      tmpOutputDir = FileOutputFormat.getWorkOutputPath(this.conf);
      masterIndex = new Path(tmpOutputDir, HarFileSystem.MASTER_INDEX_NAME);
      index = new Path(tmpOutputDir, HarFileSystem.INDEX_NAME);
      try {
        fs = masterIndex.getFileSystem(conf);
        if (fs.exists(masterIndex)) {
          fs.delete(masterIndex, false);
        }
        if (fs.exists(index)) {
          fs.delete(index, false);
        }
        indexStream = fs.create(index);
        outStream = fs.create(masterIndex);
        String version = VERSION + " \n";
        outStream.write(version.getBytes());
        
      } catch(IOException e) {
        throw new RuntimeException(e);
      }
    }
    
    // create the index and master index. The input to 
    // the reduce is already sorted by the hash of the 
    // files. SO we just need to write it to the index. 
    // We update the masterindex as soon as we update 
    // numIndex entries.
    public void reduce(IntWritable key, Iterator<Text> values,
        OutputCollector<Text, Text> out,
        Reporter reporter) throws IOException {
      keyVal = key.get();
      while(values.hasNext()) {
        Text value = values.next();
        String towrite = value.toString() + "\n";
        indexStream.write(towrite.getBytes());
        written++;
        if (written > HadoopArchives.NUM_LINES_IN_BLOCK_INDEX - 1) {
          // every 1000 indexes we report status
          reporter.setStatus("Creating index for archives");
          reporter.progress();
          endIndex = keyVal;
          writeLineToMasterIndex(outStream, startIndex, endIndex, startPos, indexStream.getPos());
          startPos = indexStream.getPos();
          startIndex = endIndex;
          written = 0;
        }
      }
    }
    
    public void close() throws IOException {
      //write the last part of the master index.
      if (written > 0) {
        writeLineToMasterIndex(outStream, startIndex, keyVal, startPos, indexStream.getPos());
      }
      // close the streams
      outStream.close();
      indexStream.close();
      // try increasing the replication 
      fs.setReplication(index, (short) 5);
      fs.setReplication(masterIndex, (short) 5);
    }
  }

  /**  
   * Reducer that merges entries for _index file
   */
  private static class HArchivesMergingReducer implements Reducer<IntWritable, 
  Text, Text, Text> {
    private JobConf conf = null;
    private long startIndex = 0;
    private long endIndex = 0;
    private long startPos = 0;
    private Path masterIndex = null;
    private Path index = null;
    private FileSystem fs = null;
    private FSDataOutputStream outStream = null;
    private FSDataOutputStream indexStream = null;
    private Path outputDir = null;
    private int written = 0;
    private int keyVal = 0;

    /**
     * Configure the reducer: open the _index and _masterindex files for writing
     */
    public void configure(JobConf conf) {
      this.conf = conf;
      outputDir = FileOutputFormat.getWorkOutputPath(this.conf);
      masterIndex = new Path(outputDir, HarFileSystem.MASTER_INDEX_NAME);
      index = new Path(outputDir, HarFileSystem.INDEX_NAME);
      try {
        fs = masterIndex.getFileSystem(conf);
        if (fs.exists(masterIndex)) {
          fs.delete(masterIndex, false);
        }
        if (fs.exists(index)) {
          fs.delete(index, false);
        }
        indexStream = fs.create(index);
        outStream = fs.create(masterIndex);
        String version = VERSION + " \n";
        outStream.write(version.getBytes());
        
      } catch(IOException e) {
        throw new RuntimeException(e);
      }
    }
    
    /**
     * Write the data to index and master index. The input to the reduce is
     * already sorted by the hash of the files.
     */
    public void reduce(IntWritable key, Iterator<Text> values,
        OutputCollector<Text, Text> out,
        Reporter reporter) throws IOException {
      
      // merge the children of the same directories
      Map<String, HarStatus> harItems = new HashMap<String, HarStatus>();
      while(values.hasNext()) {
        Text value = values.next();
        HarStatus harStatus = new HarStatus(value.toString());
        if (harItems.containsKey(harStatus.getName())) {
          if (!harStatus.isDir()) {
            throw new RuntimeException("File " + harStatus.getName() + " already exists in har");
          }
          HarStatus existingHarStatus = harItems.get(harStatus.getName());
          existingHarStatus.getChildren().addAll(harStatus.getChildren());
        } else {
          harItems.put(harStatus.getName(), harStatus);
        }
      }

      // write to _index file and update _masterindex
      keyVal = key.get();
      for (HarStatus harStatus: harItems.values()) {
        String towrite = harStatus.serialize() + "\n";
        indexStream.write(towrite.getBytes());
        written++;
        if (written > HadoopArchives.NUM_LINES_IN_BLOCK_INDEX - 1) {
          // every 1000 indexes we report status
          reporter.setStatus("Creating index for archives");
          reporter.progress();
          endIndex = keyVal;
          writeLineToMasterIndex(outStream, startIndex, endIndex, startPos, indexStream.getPos());
          startPos = indexStream.getPos();
          startIndex = endIndex;
          written = 0;
        }
      }
    }
    
    public void close() throws IOException {
      //write the last part of the master index.
      if (written > 0) {
        writeLineToMasterIndex(outStream, startIndex, keyVal, startPos, indexStream.getPos());
      }
      // close the streams
      outStream.close();
      indexStream.close();
      // try increasing the replication 
      fs.setReplication(index, (short) 5);
      fs.setReplication(masterIndex, (short) 5);
    }
  }

  /**
   * Writes data corresponding to part of _index to master index
   * @param stream stream where to write data
   * @param startHash hash of first entry in block
   * @param endHash hash of last entry in block
   * @param indexStartPos position (in bytes) of the beginning of the block
   * @param indexEndPos position (in bytes) of the end of the block
   * @throws IOException
   */
  private static void writeLineToMasterIndex(FSDataOutputStream stream, long startHash,
      long endHash, long indexStartPos, long indexEndPos) throws IOException {
    String toWrite = startHash + " " + endHash + " " + indexStartPos + " " + indexEndPos + "\n";
    stream.write(toWrite.getBytes());
  }
  
  /**
   * Creates new stream to write actual file data
   * @param dst parent of the part-id file  
   * @param partId id of the part
   * @return the open stream
   * @throws IOException
   */
  private FSDataOutputStream createNewPartStream(Path dst, int partId) throws IOException {
    String partName = PART_PREFIX + partId;
    Path output = new Path(dst, partName);
    FileSystem destFs = output.getFileSystem(conf);
    FSDataOutputStream partStream = destFs.create(output, false,
        conf.getInt("io.file.buffer.size", 4096), destFs.getDefaultReplication(),
        conf.getLong(HAR_BLOCKSIZE_LABEL, HAR_BLOCKSIZE_DEFAULT));
    return partStream;
  }
  
  private static final class LocalAndArchivePaths {
    private final Path localPath;
    private final String archivePath;

    public Path getLocalPath() {
      return localPath;
    }

    public String getArchivePath() {
      return archivePath;
    }

    public LocalAndArchivePaths(Path localPath, String archivePath) {
      super();
      this.localPath = localPath;
      this.archivePath = archivePath;
    }
  }

  /**
   * Uploads local directory as har file
   * @param srcDir path to local directory to upload
   * @param dst path to har archive
   * @throws IOException
   */
  private void copyFromLocal(Path srcDir, Path dst) throws IOException {
    long partSize = conf.getLong(HAR_PARTSIZE_LABEL, HAR_PARTSIZE_DEFAULT);
    
    FileSystem srcFS = FileSystem.getLocal(conf);
    int partId = 0;
    FSDataOutputStream partStream = null;
    
    // index entries will be sorted by hash
    TreeMap<Integer, String> indexEntries = new TreeMap<Integer, String>();
    Queue<LocalAndArchivePaths> queue = new LinkedList<LocalAndArchivePaths>();
 
    try {
      queue.add(new LocalAndArchivePaths(srcDir, Path.SEPARATOR));
      while (!queue.isEmpty()) {
        LocalAndArchivePaths item = queue.remove();
        Path localPath = item.getLocalPath();
        String archiveItem = item.getArchivePath();

        FileStatus currenPathFileStatus = srcFS.getFileStatus(localPath);

        StringBuilder toWrite = new StringBuilder(
            URLEncoder.encode(item.getArchivePath().toString(), "UTF-8"));

        String properties = new HarProperties(currenPathFileStatus).serialize();

        if (currenPathFileStatus.isDir()) {
          FileStatus chlids[] = srcFS.listStatus(localPath);
          toWrite.append(" dir ");
          toWrite.append(properties);
          toWrite.append(" 0 0");
          for (FileStatus child : chlids) {
            Path childPath = child.getPath();
            String nextArchiveItem = new Path(archiveItem, childPath.getName()).toString(); 
            queue.add(new LocalAndArchivePaths(childPath, nextArchiveItem));
            toWrite.append(" ");
            toWrite.append(URLEncoder.encode(childPath.getName().toString(), "UTF-8"));
          }
          toWrite.append("\n");
        } else {
          if (partStream == null) {
            partStream = createNewPartStream(dst, partId);
          }

          toWrite.append(" file ");
          toWrite.append("part-" + partId);
          toWrite.append(" ");
          toWrite.append(partStream.getPos() + " " + currenPathFileStatus.getLen());
          toWrite.append(" " + properties);
          toWrite.append("\n");

          InputStream input = srcFS.open(localPath);
          IOUtils.copyBytes(input, partStream, conf, false);

          // proceed to next part
          if (partStream.getPos() >= partSize) {
            ++partId;
            partStream.close();
            partStream = null;
          }
        }
        int hash = HarFileSystem.getHarHash(archiveItem);
        indexEntries.put(new Integer(hash), toWrite.toString());
      }
    } finally {
      if (partStream != null) {
        partStream.close();
      }
    }

    // Now create master index
    // IndexEntries are already sorted by hash
    Path index = new Path(dst, HarFileSystem.INDEX_NAME);
    Path masterIndex = new Path(dst, HarFileSystem.MASTER_INDEX_NAME);
    
    FSDataOutputStream indexStream = null;
    FSDataOutputStream masterIndexStream = null;
    
    try {
      FileSystem dstFS = index.getFileSystem(conf);
      indexStream = dstFS.create(index);
      masterIndexStream = dstFS.create(masterIndex);
      String version = VERSION + "\n";
      masterIndexStream.write(version.getBytes());

      int startHash = 0;
      int endHash = 0;
      long indexStartPos = 0;
      long indexEndPos = 0;
      long numLines = 0;
      for (Map.Entry<Integer, String> indexEntry : indexEntries.entrySet()) {
        if (numLines == 0) {
          startHash = indexEntry.getKey();
          indexStartPos = indexStream.getPos();
        }
        endHash = indexEntry.getKey();
        indexStream.write(indexEntry.getValue().getBytes());
        ++numLines;
        if (numLines >= HadoopArchives.NUM_LINES_IN_BLOCK_INDEX) {
          numLines = 0;
          indexEndPos = indexStream.getPos();
          writeLineToMasterIndex(masterIndexStream, startHash, endHash, indexStartPos, indexEndPos);
        }
      }

      if (numLines > 0) {
        numLines = 0;
        indexEndPos = indexStream.getPos();
        writeLineToMasterIndex(masterIndexStream, startHash, endHash, indexStartPos, indexEndPos);
      }

    } finally {
      if (indexStream != null) {
        indexStream.close();
      }

      if (masterIndexStream != null) {
        masterIndexStream.close();
      }
    }
  }
  
  private void copyToLocal(Path archivePath, Path local) throws IOException {
    HarReader harReader = new HarReader(archivePath, conf);
    FileSystem localFS = FileSystem.getLocal(conf);
    FileSystem fs = archivePath.getFileSystem(conf);
    if (!localFS.getFileStatus(local).isDir()) {
      throw new IOException("Path " + local + " is not a directory");
    }
    try {
      while (harReader.hasNext()) {
        HarStatus harStatus = harReader.getNext();
        String harPath = harStatus.getName();
        
        // skip top level dir
        if (harPath.equals(Path.SEPARATOR)) {
          continue;
        }
        String relativePath = harPath.substring(1);
        Path output = new Path(local, relativePath);
        if (harStatus.isDir()) {
          localFS.mkdirs(output);
        } else {
          OutputStream outputStream = null;
          FSDataInputStream inputStream = null;
          
          try {
            outputStream = localFS.create(output);
            Path partFile = new Path(archivePath, harStatus.getPartName());
            inputStream = new HarFSDataInputStream(fs, partFile,
                harStatus.getStartIndex(), harStatus.getLength(), conf.getInt("io.file.buffer.size", 4096));
            IOUtils.copyBytes(inputStream, outputStream, conf);
          } finally {
            if (outputStream != null) {
              outputStream.close();
            }
          }
        }
      }
    } finally {
      if (harReader != null) {
        harReader.close();
      }
    }
  }

  /** 
   * General interface to parse command line arguments
   * and then execute needed actions
   */
  private interface Executor {
    public void parse(String[] args) throws Exception;
    public void run() throws Exception;
  }
  
  private class CopyFromLocalExecutor implements Executor {
    private Path sourceDir;
    private Path harDestination;
    Configuration conf;

    public CopyFromLocalExecutor(Configuration conf) {
      this.conf = conf;
    }
    
    @Override
    public void parse(String[] args) throws Exception {
      if (args.length != 2) {
        throw new ParseException("Not enough arguments to parse: expected 2, found "
            + args.length);
      }
      sourceDir = new Path(args[0]);
      harDestination = new Path(args[1]);
    }
    
    @Override
    public void run() throws Exception {
      copyFromLocal(sourceDir, harDestination);
    }
  }

  private class CopyToLocalExecutor implements Executor {
    private Path harArchive;
    private Path localFolder;
    Configuration conf;

    public CopyToLocalExecutor(Configuration conf) {
      this.conf = conf;
    }
    
    @Override
    public void parse(String[] args) throws Exception {
      if (args.length != 2) {
        throw new ParseException("Not enough arguments to parse: expected 2, found "
            + args.length);
      }
      harArchive = new Path(args[0]);
      localFolder = new Path(args[1]);
    }
    
    @Override
    public void run() throws Exception {
      copyToLocal(harArchive, localFolder);
    }
  }
  
  private class AppendFromArchiveExecutor implements Executor {
    private Path harSource;
    private List<Path> pathsInHar;
    private Path harDestination;
    private Configuration conf;

    public AppendFromArchiveExecutor(Configuration conf) {
      this.conf = conf;
    }

    public void parse(String[] args) throws Exception {
      if (args.length < 3) {
        throw new ParseException("Not enough arguments to parse: expected >= 3, found " + args.length);
      }
      harSource = new Path(args[0]);
      harDestination = new Path(args[args.length - 1]);
      pathsInHar = new ArrayList<Path>();
      for (int i = 1; i < args.length - 1; ++i) {
        pathsInHar.add(new Path(args[i]));
      }
    }
    
    public void run() throws Exception {
      appendFromArchive(harSource, pathsInHar, harDestination);
    }
  }
  
  private abstract class ArchiveExecutorBase implements Executor {
    protected Path parentPath;
    protected List<Path> pathsToArchive;
    protected Path harDestination;
    protected Configuration conf;
    
    public ArchiveExecutorBase(Configuration conf) {
      this.conf = conf;
    }
    
    @Override
    public void parse(String[] args) throws Exception {
      Options options = new Options();
      Option parentOption = OptionBuilder.isRequired().hasArg().create("p");
      options.addOption(parentOption);
      
      CommandLineParser parser = new PosixParser();
      CommandLine cmd = parser.parse(options, args);
      
      parentPath = new Path(cmd.getOptionValue("p"));
      parsePositionalOptions(cmd.getArgs());
    }
    
    private void parsePositionalOptions(String[] args) throws Exception {
      if (args.length < 2) {
        throw new ParseException("Not enough arguments to parse: expected >= 2, found " + args.length);
      }
      String archiveName = args[0];
      if (!checkValidName(archiveName)) {
        throw new ParseException("Invalid archive name: " + archiveName);
        
      }
      harDestination = new Path(args[args.length - 1], archiveName);
      pathsToArchive = new ArrayList<Path>();
      if (args.length == 2) {
        // assuming if the user does not specify path for sources
        // the whole parent directory needs to be archived. 
        pathsToArchive.add(parentPath);
        return;
      }
      
      // process other paths
      for (int i = 1; i < args.length - 1; ++i) {
        Path argPath = new Path(args[i]);
        if (argPath.isAbsolute()) {
          throw new ParseException("source path " + argPath +
              " is not relative  to "+ parentPath);
        }
        Path srcPath = new Path(parentPath, argPath);
        FileSystem fs = srcPath.getFileSystem(conf);
        FileStatus[] statuses = fs.globStatus(srcPath);
        for (FileStatus status : statuses) {
          pathsToArchive.add(fs.makeQualified(status.getPath()));
        }
      }
    }
  }

  private class ArchiveExecutor extends ArchiveExecutorBase {
    public ArchiveExecutor(Configuration conf) {
      super(conf);
    }
    
    @Override
    public void run() throws Exception {
      archive(parentPath, pathsToArchive, harDestination, false);
    }
  }
  
  private class AppendExecutor extends ArchiveExecutorBase {
    public AppendExecutor(Configuration conf) {
      super(conf);
    }
    
    @Override
    public void run() throws Exception {
      archive(parentPath, pathsToArchive, harDestination, true);
    }
  }

  private void doRun(String[] args) throws Exception {
    if (args.length < 1) {
      System.out.println(USAGE);
      throw new ParseException("Invalid usage: command was not specified");
    }
    
    String command = args[0];
    Executor executor = executors.get(command);
    if (executor == null) {
      System.err.println("Unknown command: " + command + ". Available commands:");
      for (String cmd : executors.keySet()) {
        System.err.println(cmd);
      }
      throw new ParseException("Unknown command: " + command);
    }

    String[] otherArgs = new String[args.length - 1];
    for (int i = 1; i < args.length; ++i) {
      otherArgs[i-1] = args[i];
    }

    try {
      executor.parse(otherArgs);
    } catch (Exception e) {
      throw new Exception("Error, while parsing args.", e);
    }
    executor.run();
  }
  
  @Override
  public int run(String[] args) throws Exception {
    try {
      doRun(args);
    } catch(Exception e) {
      LOG.debug("Exception in archives  ", e);
      e.printStackTrace();
      System.err.println("Exception in archives");
      System.err.println(e.getLocalizedMessage());
      return 1;
    }
    return 0;
  }
  
  public static void main(String[] args) throws Exception {
    JobConf job = new JobConf(HadoopArchives.class);
    HadoopArchives harchives = new HadoopArchives(job);
    int ret = ToolRunner.run(harchives, args);
    System.exit(ret);
  }
}