ToolTestComparator.java example

Explorer
spork-streaming-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.hadoop.zebra.mapred;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.Map;
import java.util.Random;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.zebra.BaseTestCase;
import org.apache.hadoop.zebra.io.BasicTable;
import org.apache.hadoop.zebra.io.TableInserter;
import org.apache.hadoop.zebra.io.TableScanner;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.pig.TableStorer;
import org.apache.hadoop.zebra.schema.Schema;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.executionengine.ExecJob;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.junit.Assert;

/**
 * TestComparator
 * 
 * Utility for verifying tables created during Zebra Stress Testing
 * 
 */
public class ToolTestComparator extends BaseTestCase {

  final static String TABLE_SCHEMA = "count:int,seed:int,int1:int,int2:int,str1:string,str2:string,byte1:bytes,"
      + "byte2:bytes,float1:float,long1:long,double1:double,m1:map(string),r1:record(f1:string, f2:string),"
      + "c1:collection(record(a:string, b:string))";
  final static String TABLE_STORAGE = "[count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1];[m1#{a}];[r1,c1]";

  private static Random generator = new Random();

  protected static ExecJob pigJob;

  private static int totalNumbCols;
  private static long totalNumbVerifiedRows;

  /**
   * Setup and initialize environment
   */
  public static void setUp() throws Exception {
	  init();
  }

  /**
   * Verify load/store
   * 
   */
  public static void verifyLoad(String pathTable1, String pathTable2,
      int numbCols) throws IOException {
    System.out.println("verifyLoad()");

    // Load table1
    String query1 = "table1 = LOAD '" + pathTable1
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    System.out.println("verifyLoad() running query : " + query1);
    pigServer.registerQuery(query1);

    // Load table2
    String query2 = "table2 = LOAD '" + pathTable2
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    System.out.println("verifyLoad() running query : " + query2);
    pigServer.registerQuery(query2);

    // Get metrics from first table
    Iterator<Tuple> it1 = pigServer.openIterator("table1");

    int numbCols1 = 0;
    long numbRows1 = 0;

    while (it1.hasNext()) {
      ++numbRows1; // increment row count
      Tuple rowValue = it1.next();
      numbCols1 = rowValue.size();
      if (numbCols != 0)
        Assert.assertEquals(
            "Verify failed - Table1 has wrong number of expected columns "
                + "\n row number : " + numbRows1 + "\n expected column size : "
                + numbCols + "\n actual columns size  : " + numbCols1,
            numbCols, numbCols1);
    }

    // Get metrics from second table
    Iterator<Tuple> it2 = pigServer.openIterator("table2");

    int numbCols2 = 0;
    long numbRows2 = 0;

    while (it2.hasNext()) {
      ++numbRows2; // increment row count
      Tuple rowValue = it2.next();
      numbCols2 = rowValue.size();
      if (numbCols != 0)
        Assert.assertEquals(
            "Verify failed - Table2 has wrong number of expected columns "
                + "\n row number : " + numbRows2 + "\n expected column size : "
                + numbCols + "\n actual columns size  : " + numbCols2,
            numbCols, numbCols2);
    }

    Assert
        .assertEquals(
            "Verify failed - Tables have different number row sizes "
                + "\n table1 rows : " + numbRows1 + "\n table2 rows : "
                + numbRows2, numbRows1, numbRows2);

    Assert.assertEquals(
        "Verify failed - Tables have different number column sizes "
            + "\n table1 column size : " + numbCols1
            + "\n table2 column size : " + numbCols2, numbCols1, numbCols2);

    System.out.println();
    System.out.println("Verify load - table1 columns : " + numbCols1);
    System.out.println("Verify load - table2 columns : " + numbCols2);
    System.out.println("Verify load - table1 rows : " + numbRows1);
    System.out.println("Verify load - table2 rows : " + numbRows2);
    System.out.println("Verify load - PASS");
  }

  /**
   * Verify table
   * 
   */
  public static void verifyTable(String pathTable1) throws IOException {
    System.out.println("verifyTable()");

    // Load table1
    String query1 = "table1 = LOAD '" + pathTable1
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    System.out.println("verifyTable() running query : " + query1);
    pigServer.registerQuery(query1);

    // Get metrics from table
    Iterator<Tuple> it1 = pigServer.openIterator("table1");

    int numbCols1 = 0;
    long numbRows1 = 0;

    System.out.println("DEBUG starting to iterate table1");

    while (it1.hasNext()) {
      ++numbRows1; // increment row count
      Tuple rowValue = it1.next();
      numbCols1 = rowValue.size();
    }

    System.out.println();
    System.out.println("Verify table columns : " + numbCols1);
    System.out.println("Verify table rows : " + numbRows1);
    System.out.println("Verify table complete");
  }

  /**
   * Verify sorted
   * 
   */
  public static void verifySorted(String pathTable1, String pathTable2,
      int sortCol, String sortKey, int numbCols, int rowMod)
      throws IOException, ParseException {
    System.out.println("verifySorted()");

    // Load table1
    String query1 = "table1 = LOAD '" + pathTable1
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    System.out.println("verifySorted() running query : " + query1);
    pigServer.registerQuery(query1);

    //
    // Get metrics from first table (unsorted)
    //
    Iterator<Tuple> it1 = pigServer.openIterator("table1");

    int numbCols1 = 0;
    long numbRows1 = 0;

    System.out.println("DEBUG starting to iterate table1");

    while (it1.hasNext()) {
      ++numbRows1; // increment row count
      Tuple rowValue = it1.next();
      numbCols1 = rowValue.size();
      if (numbCols != 0)
        Assert.assertEquals(
            "Verify failed - Table1 has wrong number of expected columns "
                + "\n row number : " + numbRows1 + "\n expected column size : "
                + numbCols + "\n actual columns size  : " + numbCols1,
            numbCols, numbCols1);
    }

    System.out.println();
    System.out.println("Verify unsorted table1 columns : " + numbCols1);
    System.out.println("Verify unsorted table1 rows : " + numbRows1);

    System.out.println("\nDEBUG starting to iterate table2");

    //
    // Get metrics from second table (sorted)
    //
    long numbRows2 = verifySortedTable(pathTable2, sortCol, sortKey, numbCols,
        rowMod, null);

    int numbCols2 = totalNumbCols;
    long numbVerifiedRows = totalNumbVerifiedRows;

    Assert
        .assertEquals(
            "Verify failed - Tables have different number row sizes "
                + "\n table1 rows : " + numbRows1 + "\n table2 rows : "
                + numbRows2, numbRows1, numbRows2);

    Assert.assertEquals(
        "Verify failed - Tables have different number column sizes "
            + "\n table1 column size : " + numbCols1
            + "\n table2 column size : " + numbCols2, numbCols1, numbCols2);

    System.out.println();
    System.out.println("Verify unsorted table1 columns : " + numbCols1);
    System.out.println("Verify sorted   table2 columns : " + numbCols2);
    System.out.println("Verify unsorted table1 rows : " + numbRows1);
    System.out.println("Verify sorted   table2 rows : " + numbRows2);
    System.out.println("Verify sorted - numb verified rows : "
        + numbVerifiedRows);
    System.out.println("Verify sorted - sortCol : " + sortCol);
    System.out.println("Verify sorted - PASS");
  }

  /**
   * Verify merge-join
   * 
   */
  public static void verifyMergeJoin(String pathTable1, int sortCol,
      String sortKey, int numbCols, int rowMod, String verifyDataColName) throws IOException,
      ParseException {
    System.out.println("verifyMergeJoin()");

    //
    // Verify sorted table
    //
    long numbRows = verifySortedTable(pathTable1, sortCol, sortKey, numbCols,
        rowMod, verifyDataColName);

    System.out.println();
    System.out.println("Verify merge-join   table columns : " + totalNumbCols);
    System.out.println("Verify merge-join   table rows : " + numbRows);
    System.out.println("Verify merge-join - numb verified rows : "
        + totalNumbVerifiedRows);
    System.out.println("Verify merge-join - sortCol : " + sortCol);
    System.out.println("Verify merge-join - PASS");
  }

  /**
   * Verify sorted-union
   * 
   */
  public static void verifySortedUnion(ArrayList<String> unionPaths,
      String pathTable1, int sortCol, String sortKey, int numbCols, int rowMod,
      String verifyDataColName) throws IOException, ParseException {
    System.out.println("verifySortedUnion()");

    long numbUnionRows = 0;
    ArrayList<Long> numbRows = new ArrayList<Long>();

    // Get number of rows from each of the input union tables
   for (int i = 0; i < unionPaths.size(); ++i) {
      // Load table1
      String query1 = "table1 = LOAD '" + unionPaths.get(i)
          + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
      System.out.println("verifySortedUnion() running query : " + query1);
      pigServer.registerQuery(query1);
      String orderby1 = "sort1 = ORDER table1 BY " + sortKey + " ;";
      System.out.println("orderby1 : " + orderby1);
      pigServer.registerQuery(orderby1);

      // Get metrics for each input sorted table
      Iterator<Tuple> it1 = pigServer.openIterator("sort1");
      long numbRows1 = 0;

      while (it1.hasNext()) {
        ++numbRows1; // increment row count
        Tuple rowValue = it1.next();
      }
      numbRows.add(numbRows1);
      numbUnionRows += numbRows1;
    }

    //
    // Verify sorted union table
    //
    long numbRows1 = verifySortedTable(pathTable1, sortCol, sortKey, numbCols,
        rowMod, verifyDataColName);

   
    //
    // Print all union input tables and rows for each
    //
    System.out.println();
    for (int i = 0; i < unionPaths.size(); ++i) {
      System.out.println("Input union table" + i + " path  : "
          + unionPaths.get(i));
      System.out.println("Input union table" + i + " rows  : "
          + numbRows.get(i));
    }
    System.out.println();
    System.out.println("Input union total rows   : " + numbUnionRows);

    System.out.println();
    System.out.println("Verify union - table columns : " + totalNumbCols);
    System.out.println("Verify union - table rows : " + numbRows1);
    System.out.println("Verify union - numb verified rows : "
        + totalNumbVerifiedRows);
    System.out.println("Verify union - sortCol : " + sortCol);

  /*  Assert.assertEquals(
        "Verify failed - sorted union table row comparison error "
            + "\n expected table rows : " + numbUnionRows
            + "\n actual table rows : " + numbRows1, numbUnionRows, numbRows1);
*/
    System.out.println("Verify union - PASS");
  }

  /**
   * Create unsorted table
   * 
   */
  public static void createtable(String pathTable1, long numbRows, int seed,
      boolean debug) throws ExecException, IOException, ParseException {
    System.out.println("createtable()");

    Path unsortedPath = new Path(pathTable1);

    // Remove old table (if present)
    removeDir(unsortedPath);

    // Create table
    BasicTable.Writer writer = new BasicTable.Writer(unsortedPath,
        TABLE_SCHEMA, TABLE_STORAGE, conf);

    Schema schema = writer.getSchema();
    Tuple tuple = TypesUtils.createTuple(schema);
    TableInserter inserter = writer.getInserter("ins", false);

    Map<String, String> m1 = new HashMap<String, String>();

    Tuple tupRecord1; // record
    tupRecord1 = TypesUtils.createTuple(schema.getColumnSchema("r1")
        .getSchema()); // r1 schema

    DataBag bag1 = TypesUtils.createBag();
    Schema schColl = schema.getColumnSchema("c1").getSchema(); // c1 schema
    Tuple tupColl1 = TypesUtils.createTuple(schColl);
    Tuple tupColl2 = TypesUtils.createTuple(schColl);

    int randRange = new Long(numbRows / 10).intValue(); // random range to allow
    // for duplicate values
    for (int i = 0; i < numbRows; ++i) {
      int random = generator.nextInt(randRange);

      TypesUtils.resetTuple(tuple); // reset row tuple
      m1.clear(); // reset map
      TypesUtils.resetTuple(tupRecord1); // reset record
      TypesUtils.resetTuple(tupColl1); // reset collection
      TypesUtils.resetTuple(tupColl2);
      bag1.clear();

      tuple.set(0, i); // count
      tuple.set(1, seed); // seed

      tuple.set(2, i); // int1
      tuple.set(3, random); // int2
      tuple.set(4, "string " + i); // str1
      tuple.set(5, "string random " + random); // str2
      tuple.set(6, new DataByteArray("byte " + i)); // byte1
      tuple.set(7, new DataByteArray("byte random " + random)); // byte2

      tuple.set(8, new Float(i * -1)); // float1 negative
      tuple.set(9, new Long(numbRows - i)); // long1 reverse
      tuple.set(10, new Double(i * 100)); // double1

      // insert map1
      m1.put("a", "m1");
      m1.put("b", "m1 " + i);
      tuple.set(11, m1);

      // insert record1
      tupRecord1.set(0, "r1 " + seed);
      tupRecord1.set(1, "r1 " + i);
      tuple.set(12, tupRecord1);

      // insert collection1
      // tupColl1.set(0, "c1 a " + seed);
      // tupColl1.set(1, "c1 a " + i);
      // bag1.add(tupColl1); // first collection item
      bag1.add(tupRecord1); // first collection item
      bag1.add(tupRecord1); // second collection item

      // tupColl2.set(0, "c1 b " + seed);
      // tupColl2.set(1, "c1 b " + i);
      // bag1.add(tupColl2); // second collection item

      tuple.set(13, bag1);

      inserter.insert(new BytesWritable(("key" + i).getBytes()), tuple);
    }
    inserter.close();
    writer.close();

    if (debug == true) {
      // Load tables
      String query1 = "table1 = LOAD '" + unsortedPath.toString()
          + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
      pigServer.registerQuery(query1);

      // Print Table
      printTable("table1");
    }

    System.out.println("Table Path : " + unsortedPath);
  }

  /**
   * Create sorted table
   * 
   */
  public static void createsortedtable(String pathTable1, String pathTable2,
      String sortString, boolean debug) throws ExecException, IOException {
    System.out.println("createsortedtable()");

    Path unsortedPath = new Path(pathTable1);
    Path sortedPath = new Path(pathTable2);

    // Remove old table (if present)
    removeDir(sortedPath);

    // Load tables
    String query1 = "table1 = LOAD '" + unsortedPath.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    pigServer.registerQuery(query1);

    // Sort table
    String orderby1 = "sort1 = ORDER table1 BY " + sortString + " ;";
    System.out.println("orderby1 : " + orderby1);
    pigServer.registerQuery(orderby1);

    // Store sorted tables
    pigJob = pigServer.store("sort1", sortedPath.toString(), TableStorer.class
        .getCanonicalName()
        + "('" + TABLE_STORAGE + "')");
    Assert.assertNull(pigJob.getException());

    // Print Table
    if (debug == true)
      printTable("sort1");

    System.out.println("Sorted Path : " + sortedPath);
  }

  /**
   * Delete table
   * 
   */
  public static void deleteTable(String pathTable1) throws ExecException,
      IOException {
    System.out.println("deleteTable()");

    Path tablePath = new Path(pathTable1);

    // Remove table (if present)
    removeDir(tablePath);

    System.out.println("Deleted Table Path : " + tablePath);
  }

  /**
   * Verify sorted table
   * 
   * Using BasicTable.Reader, read all table rows and verify that sortCol is in
   * sorted order
   * 
   */
  private static long verifySortedTable(String pathTable1, int sortCol,
      String sortKey, int numbCols, int rowMod, String verifyDataColName)
      throws IOException, ParseException {

    long numbRows = 0;

    Path tablePath = new Path(pathTable1);

    BasicTable.Reader reader = new BasicTable.Reader(tablePath, conf);
   
    JobConf conf1 = new JobConf(conf);
    System.out.println("sortKey: " + sortKey);
    TableInputFormat.setInputPaths(conf1, new Path(pathTable1));
 
    TableInputFormat.requireSortedTable(conf1, null);
    TableInputFormat tif = new TableInputFormat();
 
    SortedTableSplit split = (SortedTableSplit) tif.getSplits(conf1, 1)[0];
    
    TableScanner scanner = reader.getScanner(split.getBegin(), split.getEnd(), true);
    BytesWritable key = new BytesWritable();
    Tuple rowValue = TypesUtils.createTuple(scanner.getSchema());

    Object lastVal = null;
    int numbCols1 = 0;
    long numbVerifiedRows = 0;

    while (!scanner.atEnd()) {
      ++numbRows;
      scanner.getKey(key);

      scanner.getValue(rowValue);

      // Verify every nth row
      if ((numbRows % rowMod) == 0) {
        ++numbVerifiedRows;
        numbCols1 = rowValue.size();
        if (numbCols != 0)
          Assert.assertEquals(
              "Verify failed - Table1 has wrong number of expected columns "
                  + "\n row numberrr : " + numbRows
                  + "\n expected column size : " + numbCols
                  + "\n actual columns size  : " + numbCols1, numbCols,
              numbCols1);

        Object newVal = rowValue.get(sortCol);

        // Verify sort key is in sorted order
        Assert.assertTrue("Verify failed - Table1 sort comparison error "
            + "\n row number : " + numbRows + "\n sort column : " + sortCol
            + "\n sort column last value    : " + lastVal
            + "\n sort column current value : " + newVal, compareTo(newVal,
            lastVal) >= 0);

        lastVal = newVal; // save last compare value

        //
        // Optionally verify data
        //
       
        if (verifyDataColName != null && verifyDataColName.equals("long1")) {
          Object newValLong1 = rowValue.get(sortCol);
          if (numbRows < 2000){
            System.out.println("Row : "+ (numbRows-1) +" long1 value : "+newValLong1.toString());
          }
          Assert.assertEquals(
              "Verify failed - Union table data verification error for column name : "
                  + verifyDataColName + "\n row number : " + (numbRows-1)
                  + "\n expected value : " + (numbRows-1 + 4) / 4 + // long1 will start with value 1
                  "\n actual value   : " + newValLong1, (numbRows-1 + 4) / 4,
              newValLong1);

        }

        scanner.advance();
      }
      

    }
    
    System.out.println("\nTable Pathh : " + pathTable1);
    System.out.println("++++++++++Table Row number : " + numbRows);
    
   
    reader.close();
   
    totalNumbCols = numbCols1;
    totalNumbVerifiedRows = numbVerifiedRows;

    return numbRows;
  }

  /**
   * Print table rows
   * 
   * Print the first number of specified table rows
   * 
   */
  public static void printRows(String pathTable1, long numbRows)
      throws IOException {
    System.out.println("printRows()");

    // Load table1
    String query1 = "table1 = LOAD '" + pathTable1
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    pigServer.registerQuery(query1);
    
   //
    // Get metrics from first table (unsorted)
    //
    long count = 0;
    Iterator<Tuple> it1 = pigServer.openIterator("table1");

    while (it1.hasNext()) {
      ++count;
      if (count > numbRows)
        break;
      Tuple RowValue1 = it1.next();
      System.out.println();
      for (int i = 0; i < RowValue1.size(); ++i)
        System.out.println("DEBUG: " + "table" + " RowValue.get(" + i + ") = "
            + RowValue1.get(i));
    }
    System.out.println("\nTable Path : " + pathTable1);
    System.out.println("Table Rows Printed : " + numbRows);
  }
  
  /* 
  * Print the first number of specified table rows
  * 
  */
 public static void printRowNumber(String pathTable1, String sortKey)
      throws IOException, ParseException {
    long numbRows = 0;

    Path tablePath = new Path(pathTable1);

    BasicTable.Reader reader = new BasicTable.Reader(tablePath, conf);
   
    JobConf conf1 = new JobConf(conf);
    System.out.println("sortKey: " + sortKey);
    TableInputFormat.setInputPaths(conf1, new Path(pathTable1));

    TableInputFormat.requireSortedTable(conf1, null);
    TableInputFormat tif = new TableInputFormat();

  
    TableScanner scanner = reader.getScanner(null, null, true);
    BytesWritable key = new BytesWritable();
    Tuple rowValue = TypesUtils.createTuple(scanner.getSchema());

    while (!scanner.atEnd()) {
      ++numbRows;
      scanner.getKey(key);
      scanner.advance();
    }
    System.out.println("\nTable Path : " + pathTable1);
    System.out.println("Table Row number : " + numbRows);
  }
  /**
   * Compare table rows
   * 
   */
  private static boolean compareRow(Tuple rowValues1, Tuple rowValues2)
      throws IOException {
    boolean result = true;
    Assert.assertEquals(rowValues1.size(), rowValues2.size());
    for (int i = 0; i < rowValues1.size(); ++i) {
      if (!compareObj(rowValues1.get(i), rowValues2.get(i))) {
        System.out.println("DEBUG: " + " RowValue.get(" + i
            + ") value compare error : " + rowValues1.get(i) + " : "
            + rowValues2.get(i));
        result = false;
        break;
      }
    }
    return result;
  }

  /**
   * Compare table values
   * 
   */
  private static boolean compareObj(Object object1, Object object2) {
    if (object1 == null) {
      if (object2 == null)
        return true;
      else
        return false;
    } else if (object1.equals(object2))
      return true;
    else
      return false;
  }

  /**
   * Compares two objects that implement the Comparable interface
   * 
   * Zebra supported "sort" types of String, DataByteArray, Integer, Float,
   * Long, Double, and Boolean all implement the Comparable interface.
   * 
   * Returns a negative integer, zero, or a positive integer if object1 is less
   * than, equal to, or greater than object2.
   * 
   */
  private static int compareTo(Object object1, Object object2) {
    if (object1 == null) {
      if (object2 == null)
        return 0;
      else
        return -1;
    } else if (object2 == null) {
      return 1;
    } else
      return ((Comparable) object1).compareTo((Comparable) object2);
  }

  /**
   * Print Table Metadata Info (for debugging)
   * 
   */
  private static void printTableInfo(String pathString) throws IOException {
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(bos);
    System.out.println("start dumpinfo ===========");
    BasicTable.dumpInfo(pathString, ps, conf);

    System.out.println("bos.toString() : " + bos.toString());
  }

  /**
   * Print Pig Table (for debugging)
   * 
   */
  private static int printTable(String tablename) throws IOException {
    Iterator<Tuple> it1 = pigServer.openIterator(tablename);
    int numbRows = 0;
    while (it1.hasNext()) {
      Tuple RowValue1 = it1.next();
      ++numbRows;
      System.out.println();
      for (int i = 0; i < RowValue1.size(); ++i)
        System.out.println("DEBUG: " + tablename + " RowValue.get(" + i
            + ") = " + RowValue1.get(i));
    }
    System.out.println("\nRow count : " + numbRows);
    return numbRows;
  }

  /**
   * Calculate elapsed time
   * 
   */
  private static String printTime(long start, long stop) {
    long timeMillis = stop - start;
    long time = timeMillis / 1000;
    String seconds = Integer.toString((int) (time % 60));
    String minutes = Integer.toString((int) ((time % 3600) / 60));
    String hours = Integer.toString((int) (time / 3600));

    for (int i = 0; i < 2; i++) {
      if (seconds.length() < 2) {
        seconds = "0" + seconds;
      }
      if (minutes.length() < 2) {
        minutes = "0" + minutes;
      }
      if (hours.length() < 2) {
        hours = "0" + hours;
      }
    }
    String formatTime = hours + ":" + minutes + ":" + seconds;
    return formatTime;
  }

  /**
   * Main
   * 
   * Command line options:
   * 
   * -verifyOption : <load, sort, merge-join, sorted-union, dump, tableinfo,
   * createtable, createsorttable, deletetable, printrows>
   * 
   * -pathTable1 : <hdfs path> -pathTable2 : <hdfs path>
   * 
   * -pathUnionTables : <hdfs path> <hdfs path> ...
   * 
   * -rowMod : verify every nth row (optional)
   * 
   * -numbCols : number of columns table should have (optional)
   * 
   * -sortCol : for sort option (default is column 0)
   * 
   * -sortString : sort string for sort option
   * 
   * -numbRows : number of rows for new table to create
   * 
   * -seed : unique column number used for creating new tables
   * 
   * -debug : print out debug info with results (use caution, for example do not
   * used when creating large tables)
   * 
   * examples:
   * 
   * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER
   * TestComparator -verifyOption load -pathTable1 /user/hadoopqa/table1
   * -pathTable2 /user/hadoopqa/table2
   * 
   * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER
   * TestComparator -verifyOption sort -pathTable1 /user/hadoopqa/table1
   * -pathTable2 /user/hadoopqa/table2 -sortCol 0
   * 
   * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER
   * TestComparator -verifyOption merge-join -pathTable1 /user/hadoopqa/table1
   * -sortCol 0
   * 
   * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER
   * TestComparator -verifyOption sorted-union -pathTable1
   * /user/hadoopqa/unionTable1 -pathUnionTables /user/hadoopqa/inputTable1
   * /user/hadoopqa/inputTable2 /user/hadoopqa/inputTable3 -sortCol 0 -rowMod 5
   * 
   * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER
   * TestComparator -verifyOption dump -pathTable1 /user/hadoopqa/table1
   * 
   * @param args
   */

  public static void main(String[] args) {
    long startTime = System.currentTimeMillis();

    System.out.println("Running Zebra TestComparator");
    try {
      ArrayList<String> unionPaths = new ArrayList<String>();
      String verifyOption = null;
      String pathTable1 = null;
      String pathTable2 = null;
      String sortString = null;
      String verifyDataColName = null;
      int rowMod = 1; // default to verify every table row
      int numbCols = 0; // if provided, verify that table has these number of
      // columns
      int sortCol = 0; // default to first column as sort index
      long numbRows = 0; // number of rows to create for new table
      int seed = 0; // used for creating new tabletable1
      boolean debug = false;

      // Read arguments
      if (args.length >= 2) {
        for (int i = 0; i < args.length; ++i) {

          if (args[i].equals("-verifyOption")) {
            verifyOption = args[++i];
          } else if (args[i].equals("-pathTable1")) {
            pathTable1 = args[++i];
          } else if (args[i].equals("-pathTable2")) {
            pathTable2 = args[++i];
          } else if (args[i].equals("-pathUnionTables")) {
            while (++i < args.length && !args[i].startsWith("-")) {
              System.out.println("args[i] : " + args[i]);
              unionPaths.add(args[i]);
            }
            if (i < args.length)
              --i;
          } else if (args[i].equals("-rowMod")) {
            rowMod = new Integer(args[++i]).intValue();
          } else if (args[i].equals("-sortString")) {
            sortString = args[++i];
          } else if (args[i].equals("-sortCol")) {
            sortCol = new Integer(args[++i]).intValue();
          } else if (args[i].equals("-numbCols")) {
            numbCols = new Integer(args[++i]).intValue();
          } else if (args[i].equals("-numbRows")) {
            numbRows = new Long(args[++i]).intValue();
          } else if (args[i].equals("-seed")) {
            seed = new Integer(args[++i]).intValue();
          } else if (args[i].equals("-verifyDataColName")) {
            verifyDataColName = args[++i];
          } else if (args[i].equals("-debug")) {
            debug = true;
          } else {
            System.out.println("Exiting - unknown argument : " + args[i]);
            System.exit(0);
          }
        }
      } else {
        System.out
            .println("Error - need to provide required comparator arguments");
        System.exit(0);
      }

      // Setup environment
      setUp();

      //
      // Run appropriate verify option
      //
      if (verifyOption == null) {
        System.out.println("Exiting -verifyOption not set");
        System.exit(0);
      }

      if (verifyOption.equals("load")) {
        // Verify both tables are equal
        verifyLoad(pathTable1, pathTable2, numbCols);
      } else if (verifyOption.equals("sort")) {
        // Verify table is in sorted order
        verifySorted(pathTable1, pathTable2, sortCol, sortString, numbCols,
            rowMod);
      } else if (verifyOption.equals("merge-join")) {
        // Verify merge-join table is in sorted order
        verifyMergeJoin(pathTable1, sortCol, sortString, numbCols, rowMod,verifyDataColName);
      } else if (verifyOption.equals("sorted-union")) {
        Object lastVal = null;
        
        // Verify sorted-union table is in sorted order
        verifySortedUnion(unionPaths, pathTable1, sortCol, sortString,
            numbCols, rowMod, verifyDataColName);
      } else if (verifyOption.equals("dump")) {
        // Dump table info
        printTableInfo(pathTable1);
      } else if (verifyOption.equals("tableinfo")) {
        // Verify table to get row and column info
        verifyTable(pathTable1);
      } else if (verifyOption.equals("deletetable")) {
        // Delete table directory
        deleteTable(pathTable1);
      } else if (verifyOption.equals("printrows")) {
        // Print some table rows
        printRows(pathTable1, numbRows);
      } else if (verifyOption.equals("createtable")) {
        // Create unsorted table
        createtable(pathTable1, numbRows, seed, debug);
      } else if (verifyOption.equals("createsorttable")) {
        // Create sorted table
        createsortedtable(pathTable1, pathTable2, sortString, debug);
      }else if (verifyOption.equals("printrownumber")) {
        Object lastVal = null;
        //print total number of rows of the table
        printRowNumber(pathTable1,sortString);
      }
      //
      else {
        System.out.println("Exiting - unknown -verifyOption value : "
            + verifyOption);
        System.exit(0);
      }

    } catch (Exception e) {
      e.printStackTrace();
    }

    long stopTime = System.currentTimeMillis();
    System.out.println("\nElapsed time : " + printTime(startTime, stopTime)
        + "\n");
  }

}