/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.hadoop.zebra.mapred; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; import java.util.HashMap; import java.util.Iterator; import java.util.ArrayList; import java.util.Map; import java.util.Random; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.zebra.BaseTestCase; import org.apache.hadoop.zebra.io.BasicTable; import org.apache.hadoop.zebra.io.TableInserter; import org.apache.hadoop.zebra.io.TableScanner; import org.apache.hadoop.zebra.parser.ParseException; import org.apache.hadoop.zebra.pig.TableStorer; import org.apache.hadoop.zebra.schema.Schema; import org.apache.hadoop.zebra.types.TypesUtils; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.executionengine.ExecJob; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.junit.Assert; /** * TestComparator * * Utility for verifying tables created during Zebra Stress Testing * */ public class ToolTestComparator extends BaseTestCase { final static String TABLE_SCHEMA = "count:int,seed:int,int1:int,int2:int,str1:string,str2:string,byte1:bytes," + "byte2:bytes,float1:float,long1:long,double1:double,m1:map(string),r1:record(f1:string, f2:string)," + "c1:collection(record(a:string, b:string))"; final static String TABLE_STORAGE = "[count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1];[m1#{a}];[r1,c1]"; private static Random generator = new Random(); protected static ExecJob pigJob; private static int totalNumbCols; private static long totalNumbVerifiedRows; /** * Setup and initialize environment */ public static void setUp() throws Exception { init(); } /** * Verify load/store * */ public static void verifyLoad(String pathTable1, String pathTable2, int numbCols) throws IOException { System.out.println("verifyLoad()"); // Load table1 String query1 = "table1 = LOAD '" + pathTable1 + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; System.out.println("verifyLoad() running query : " + query1); pigServer.registerQuery(query1); // Load table2 String query2 = "table2 = LOAD '" + pathTable2 + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; System.out.println("verifyLoad() running query : " + query2); pigServer.registerQuery(query2); // Get metrics from first table Iterator<Tuple> it1 = pigServer.openIterator("table1"); int numbCols1 = 0; long numbRows1 = 0; while (it1.hasNext()) { ++numbRows1; // increment row count Tuple rowValue = it1.next(); numbCols1 = rowValue.size(); if (numbCols != 0) Assert.assertEquals( "Verify failed - Table1 has wrong number of expected columns " + "\n row number : " + numbRows1 + "\n expected column size : " + numbCols + "\n actual columns size : " + numbCols1, numbCols, numbCols1); } // Get metrics from second table Iterator<Tuple> it2 = pigServer.openIterator("table2"); int numbCols2 = 0; long numbRows2 = 0; while (it2.hasNext()) { ++numbRows2; // increment row count Tuple rowValue = it2.next(); numbCols2 = rowValue.size(); if (numbCols != 0) Assert.assertEquals( "Verify failed - Table2 has wrong number of expected columns " + "\n row number : " + numbRows2 + "\n expected column size : " + numbCols + "\n actual columns size : " + numbCols2, numbCols, numbCols2); } Assert .assertEquals( "Verify failed - Tables have different number row sizes " + "\n table1 rows : " + numbRows1 + "\n table2 rows : " + numbRows2, numbRows1, numbRows2); Assert.assertEquals( "Verify failed - Tables have different number column sizes " + "\n table1 column size : " + numbCols1 + "\n table2 column size : " + numbCols2, numbCols1, numbCols2); System.out.println(); System.out.println("Verify load - table1 columns : " + numbCols1); System.out.println("Verify load - table2 columns : " + numbCols2); System.out.println("Verify load - table1 rows : " + numbRows1); System.out.println("Verify load - table2 rows : " + numbRows2); System.out.println("Verify load - PASS"); } /** * Verify table * */ public static void verifyTable(String pathTable1) throws IOException { System.out.println("verifyTable()"); // Load table1 String query1 = "table1 = LOAD '" + pathTable1 + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; System.out.println("verifyTable() running query : " + query1); pigServer.registerQuery(query1); // Get metrics from table Iterator<Tuple> it1 = pigServer.openIterator("table1"); int numbCols1 = 0; long numbRows1 = 0; System.out.println("DEBUG starting to iterate table1"); while (it1.hasNext()) { ++numbRows1; // increment row count Tuple rowValue = it1.next(); numbCols1 = rowValue.size(); } System.out.println(); System.out.println("Verify table columns : " + numbCols1); System.out.println("Verify table rows : " + numbRows1); System.out.println("Verify table complete"); } /** * Verify sorted * */ public static void verifySorted(String pathTable1, String pathTable2, int sortCol, String sortKey, int numbCols, int rowMod) throws IOException, ParseException { System.out.println("verifySorted()"); // Load table1 String query1 = "table1 = LOAD '" + pathTable1 + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; System.out.println("verifySorted() running query : " + query1); pigServer.registerQuery(query1); // // Get metrics from first table (unsorted) // Iterator<Tuple> it1 = pigServer.openIterator("table1"); int numbCols1 = 0; long numbRows1 = 0; System.out.println("DEBUG starting to iterate table1"); while (it1.hasNext()) { ++numbRows1; // increment row count Tuple rowValue = it1.next(); numbCols1 = rowValue.size(); if (numbCols != 0) Assert.assertEquals( "Verify failed - Table1 has wrong number of expected columns " + "\n row number : " + numbRows1 + "\n expected column size : " + numbCols + "\n actual columns size : " + numbCols1, numbCols, numbCols1); } System.out.println(); System.out.println("Verify unsorted table1 columns : " + numbCols1); System.out.println("Verify unsorted table1 rows : " + numbRows1); System.out.println("\nDEBUG starting to iterate table2"); // // Get metrics from second table (sorted) // long numbRows2 = verifySortedTable(pathTable2, sortCol, sortKey, numbCols, rowMod, null); int numbCols2 = totalNumbCols; long numbVerifiedRows = totalNumbVerifiedRows; Assert .assertEquals( "Verify failed - Tables have different number row sizes " + "\n table1 rows : " + numbRows1 + "\n table2 rows : " + numbRows2, numbRows1, numbRows2); Assert.assertEquals( "Verify failed - Tables have different number column sizes " + "\n table1 column size : " + numbCols1 + "\n table2 column size : " + numbCols2, numbCols1, numbCols2); System.out.println(); System.out.println("Verify unsorted table1 columns : " + numbCols1); System.out.println("Verify sorted table2 columns : " + numbCols2); System.out.println("Verify unsorted table1 rows : " + numbRows1); System.out.println("Verify sorted table2 rows : " + numbRows2); System.out.println("Verify sorted - numb verified rows : " + numbVerifiedRows); System.out.println("Verify sorted - sortCol : " + sortCol); System.out.println("Verify sorted - PASS"); } /** * Verify merge-join * */ public static void verifyMergeJoin(String pathTable1, int sortCol, String sortKey, int numbCols, int rowMod, String verifyDataColName) throws IOException, ParseException { System.out.println("verifyMergeJoin()"); // // Verify sorted table // long numbRows = verifySortedTable(pathTable1, sortCol, sortKey, numbCols, rowMod, verifyDataColName); System.out.println(); System.out.println("Verify merge-join table columns : " + totalNumbCols); System.out.println("Verify merge-join table rows : " + numbRows); System.out.println("Verify merge-join - numb verified rows : " + totalNumbVerifiedRows); System.out.println("Verify merge-join - sortCol : " + sortCol); System.out.println("Verify merge-join - PASS"); } /** * Verify sorted-union * */ public static void verifySortedUnion(ArrayList<String> unionPaths, String pathTable1, int sortCol, String sortKey, int numbCols, int rowMod, String verifyDataColName) throws IOException, ParseException { System.out.println("verifySortedUnion()"); long numbUnionRows = 0; ArrayList<Long> numbRows = new ArrayList<Long>(); // Get number of rows from each of the input union tables for (int i = 0; i < unionPaths.size(); ++i) { // Load table1 String query1 = "table1 = LOAD '" + unionPaths.get(i) + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; System.out.println("verifySortedUnion() running query : " + query1); pigServer.registerQuery(query1); String orderby1 = "sort1 = ORDER table1 BY " + sortKey + " ;"; System.out.println("orderby1 : " + orderby1); pigServer.registerQuery(orderby1); // Get metrics for each input sorted table Iterator<Tuple> it1 = pigServer.openIterator("sort1"); long numbRows1 = 0; while (it1.hasNext()) { ++numbRows1; // increment row count Tuple rowValue = it1.next(); } numbRows.add(numbRows1); numbUnionRows += numbRows1; } // // Verify sorted union table // long numbRows1 = verifySortedTable(pathTable1, sortCol, sortKey, numbCols, rowMod, verifyDataColName); // // Print all union input tables and rows for each // System.out.println(); for (int i = 0; i < unionPaths.size(); ++i) { System.out.println("Input union table" + i + " path : " + unionPaths.get(i)); System.out.println("Input union table" + i + " rows : " + numbRows.get(i)); } System.out.println(); System.out.println("Input union total rows : " + numbUnionRows); System.out.println(); System.out.println("Verify union - table columns : " + totalNumbCols); System.out.println("Verify union - table rows : " + numbRows1); System.out.println("Verify union - numb verified rows : " + totalNumbVerifiedRows); System.out.println("Verify union - sortCol : " + sortCol); /* Assert.assertEquals( "Verify failed - sorted union table row comparison error " + "\n expected table rows : " + numbUnionRows + "\n actual table rows : " + numbRows1, numbUnionRows, numbRows1); */ System.out.println("Verify union - PASS"); } /** * Create unsorted table * */ public static void createtable(String pathTable1, long numbRows, int seed, boolean debug) throws ExecException, IOException, ParseException { System.out.println("createtable()"); Path unsortedPath = new Path(pathTable1); // Remove old table (if present) removeDir(unsortedPath); // Create table BasicTable.Writer writer = new BasicTable.Writer(unsortedPath, TABLE_SCHEMA, TABLE_STORAGE, conf); Schema schema = writer.getSchema(); Tuple tuple = TypesUtils.createTuple(schema); TableInserter inserter = writer.getInserter("ins", false); Map<String, String> m1 = new HashMap<String, String>(); Tuple tupRecord1; // record tupRecord1 = TypesUtils.createTuple(schema.getColumnSchema("r1") .getSchema()); // r1 schema DataBag bag1 = TypesUtils.createBag(); Schema schColl = schema.getColumnSchema("c1").getSchema(); // c1 schema Tuple tupColl1 = TypesUtils.createTuple(schColl); Tuple tupColl2 = TypesUtils.createTuple(schColl); int randRange = new Long(numbRows / 10).intValue(); // random range to allow // for duplicate values for (int i = 0; i < numbRows; ++i) { int random = generator.nextInt(randRange); TypesUtils.resetTuple(tuple); // reset row tuple m1.clear(); // reset map TypesUtils.resetTuple(tupRecord1); // reset record TypesUtils.resetTuple(tupColl1); // reset collection TypesUtils.resetTuple(tupColl2); bag1.clear(); tuple.set(0, i); // count tuple.set(1, seed); // seed tuple.set(2, i); // int1 tuple.set(3, random); // int2 tuple.set(4, "string " + i); // str1 tuple.set(5, "string random " + random); // str2 tuple.set(6, new DataByteArray("byte " + i)); // byte1 tuple.set(7, new DataByteArray("byte random " + random)); // byte2 tuple.set(8, new Float(i * -1)); // float1 negative tuple.set(9, new Long(numbRows - i)); // long1 reverse tuple.set(10, new Double(i * 100)); // double1 // insert map1 m1.put("a", "m1"); m1.put("b", "m1 " + i); tuple.set(11, m1); // insert record1 tupRecord1.set(0, "r1 " + seed); tupRecord1.set(1, "r1 " + i); tuple.set(12, tupRecord1); // insert collection1 // tupColl1.set(0, "c1 a " + seed); // tupColl1.set(1, "c1 a " + i); // bag1.add(tupColl1); // first collection item bag1.add(tupRecord1); // first collection item bag1.add(tupRecord1); // second collection item // tupColl2.set(0, "c1 b " + seed); // tupColl2.set(1, "c1 b " + i); // bag1.add(tupColl2); // second collection item tuple.set(13, bag1); inserter.insert(new BytesWritable(("key" + i).getBytes()), tuple); } inserter.close(); writer.close(); if (debug == true) { // Load tables String query1 = "table1 = LOAD '" + unsortedPath.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query1); // Print Table printTable("table1"); } System.out.println("Table Path : " + unsortedPath); } /** * Create sorted table * */ public static void createsortedtable(String pathTable1, String pathTable2, String sortString, boolean debug) throws ExecException, IOException { System.out.println("createsortedtable()"); Path unsortedPath = new Path(pathTable1); Path sortedPath = new Path(pathTable2); // Remove old table (if present) removeDir(sortedPath); // Load tables String query1 = "table1 = LOAD '" + unsortedPath.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query1); // Sort table String orderby1 = "sort1 = ORDER table1 BY " + sortString + " ;"; System.out.println("orderby1 : " + orderby1); pigServer.registerQuery(orderby1); // Store sorted tables pigJob = pigServer.store("sort1", sortedPath.toString(), TableStorer.class .getCanonicalName() + "('" + TABLE_STORAGE + "')"); Assert.assertNull(pigJob.getException()); // Print Table if (debug == true) printTable("sort1"); System.out.println("Sorted Path : " + sortedPath); } /** * Delete table * */ public static void deleteTable(String pathTable1) throws ExecException, IOException { System.out.println("deleteTable()"); Path tablePath = new Path(pathTable1); // Remove table (if present) removeDir(tablePath); System.out.println("Deleted Table Path : " + tablePath); } /** * Verify sorted table * * Using BasicTable.Reader, read all table rows and verify that sortCol is in * sorted order * */ private static long verifySortedTable(String pathTable1, int sortCol, String sortKey, int numbCols, int rowMod, String verifyDataColName) throws IOException, ParseException { long numbRows = 0; Path tablePath = new Path(pathTable1); BasicTable.Reader reader = new BasicTable.Reader(tablePath, conf); JobConf conf1 = new JobConf(conf); System.out.println("sortKey: " + sortKey); TableInputFormat.setInputPaths(conf1, new Path(pathTable1)); TableInputFormat.requireSortedTable(conf1, null); TableInputFormat tif = new TableInputFormat(); SortedTableSplit split = (SortedTableSplit) tif.getSplits(conf1, 1)[0]; TableScanner scanner = reader.getScanner(split.getBegin(), split.getEnd(), true); BytesWritable key = new BytesWritable(); Tuple rowValue = TypesUtils.createTuple(scanner.getSchema()); Object lastVal = null; int numbCols1 = 0; long numbVerifiedRows = 0; while (!scanner.atEnd()) { ++numbRows; scanner.getKey(key); scanner.getValue(rowValue); // Verify every nth row if ((numbRows % rowMod) == 0) { ++numbVerifiedRows; numbCols1 = rowValue.size(); if (numbCols != 0) Assert.assertEquals( "Verify failed - Table1 has wrong number of expected columns " + "\n row numberrr : " + numbRows + "\n expected column size : " + numbCols + "\n actual columns size : " + numbCols1, numbCols, numbCols1); Object newVal = rowValue.get(sortCol); // Verify sort key is in sorted order Assert.assertTrue("Verify failed - Table1 sort comparison error " + "\n row number : " + numbRows + "\n sort column : " + sortCol + "\n sort column last value : " + lastVal + "\n sort column current value : " + newVal, compareTo(newVal, lastVal) >= 0); lastVal = newVal; // save last compare value // // Optionally verify data // if (verifyDataColName != null && verifyDataColName.equals("long1")) { Object newValLong1 = rowValue.get(sortCol); if (numbRows < 2000){ System.out.println("Row : "+ (numbRows-1) +" long1 value : "+newValLong1.toString()); } Assert.assertEquals( "Verify failed - Union table data verification error for column name : " + verifyDataColName + "\n row number : " + (numbRows-1) + "\n expected value : " + (numbRows-1 + 4) / 4 + // long1 will start with value 1 "\n actual value : " + newValLong1, (numbRows-1 + 4) / 4, newValLong1); } scanner.advance(); } } System.out.println("\nTable Pathh : " + pathTable1); System.out.println("++++++++++Table Row number : " + numbRows); reader.close(); totalNumbCols = numbCols1; totalNumbVerifiedRows = numbVerifiedRows; return numbRows; } /** * Print table rows * * Print the first number of specified table rows * */ public static void printRows(String pathTable1, long numbRows) throws IOException { System.out.println("printRows()"); // Load table1 String query1 = "table1 = LOAD '" + pathTable1 + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query1); // // Get metrics from first table (unsorted) // long count = 0; Iterator<Tuple> it1 = pigServer.openIterator("table1"); while (it1.hasNext()) { ++count; if (count > numbRows) break; Tuple RowValue1 = it1.next(); System.out.println(); for (int i = 0; i < RowValue1.size(); ++i) System.out.println("DEBUG: " + "table" + " RowValue.get(" + i + ") = " + RowValue1.get(i)); } System.out.println("\nTable Path : " + pathTable1); System.out.println("Table Rows Printed : " + numbRows); } /* * Print the first number of specified table rows * */ public static void printRowNumber(String pathTable1, String sortKey) throws IOException, ParseException { long numbRows = 0; Path tablePath = new Path(pathTable1); BasicTable.Reader reader = new BasicTable.Reader(tablePath, conf); JobConf conf1 = new JobConf(conf); System.out.println("sortKey: " + sortKey); TableInputFormat.setInputPaths(conf1, new Path(pathTable1)); TableInputFormat.requireSortedTable(conf1, null); TableInputFormat tif = new TableInputFormat(); TableScanner scanner = reader.getScanner(null, null, true); BytesWritable key = new BytesWritable(); Tuple rowValue = TypesUtils.createTuple(scanner.getSchema()); while (!scanner.atEnd()) { ++numbRows; scanner.getKey(key); scanner.advance(); } System.out.println("\nTable Path : " + pathTable1); System.out.println("Table Row number : " + numbRows); } /** * Compare table rows * */ private static boolean compareRow(Tuple rowValues1, Tuple rowValues2) throws IOException { boolean result = true; Assert.assertEquals(rowValues1.size(), rowValues2.size()); for (int i = 0; i < rowValues1.size(); ++i) { if (!compareObj(rowValues1.get(i), rowValues2.get(i))) { System.out.println("DEBUG: " + " RowValue.get(" + i + ") value compare error : " + rowValues1.get(i) + " : " + rowValues2.get(i)); result = false; break; } } return result; } /** * Compare table values * */ private static boolean compareObj(Object object1, Object object2) { if (object1 == null) { if (object2 == null) return true; else return false; } else if (object1.equals(object2)) return true; else return false; } /** * Compares two objects that implement the Comparable interface * * Zebra supported "sort" types of String, DataByteArray, Integer, Float, * Long, Double, and Boolean all implement the Comparable interface. * * Returns a negative integer, zero, or a positive integer if object1 is less * than, equal to, or greater than object2. * */ private static int compareTo(Object object1, Object object2) { if (object1 == null) { if (object2 == null) return 0; else return -1; } else if (object2 == null) { return 1; } else return ((Comparable) object1).compareTo((Comparable) object2); } /** * Print Table Metadata Info (for debugging) * */ private static void printTableInfo(String pathString) throws IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(bos); System.out.println("start dumpinfo ==========="); BasicTable.dumpInfo(pathString, ps, conf); System.out.println("bos.toString() : " + bos.toString()); } /** * Print Pig Table (for debugging) * */ private static int printTable(String tablename) throws IOException { Iterator<Tuple> it1 = pigServer.openIterator(tablename); int numbRows = 0; while (it1.hasNext()) { Tuple RowValue1 = it1.next(); ++numbRows; System.out.println(); for (int i = 0; i < RowValue1.size(); ++i) System.out.println("DEBUG: " + tablename + " RowValue.get(" + i + ") = " + RowValue1.get(i)); } System.out.println("\nRow count : " + numbRows); return numbRows; } /** * Calculate elapsed time * */ private static String printTime(long start, long stop) { long timeMillis = stop - start; long time = timeMillis / 1000; String seconds = Integer.toString((int) (time % 60)); String minutes = Integer.toString((int) ((time % 3600) / 60)); String hours = Integer.toString((int) (time / 3600)); for (int i = 0; i < 2; i++) { if (seconds.length() < 2) { seconds = "0" + seconds; } if (minutes.length() < 2) { minutes = "0" + minutes; } if (hours.length() < 2) { hours = "0" + hours; } } String formatTime = hours + ":" + minutes + ":" + seconds; return formatTime; } /** * Main * * Command line options: * * -verifyOption : <load, sort, merge-join, sorted-union, dump, tableinfo, * createtable, createsorttable, deletetable, printrows> * * -pathTable1 : <hdfs path> -pathTable2 : <hdfs path> * * -pathUnionTables : <hdfs path> <hdfs path> ... * * -rowMod : verify every nth row (optional) * * -numbCols : number of columns table should have (optional) * * -sortCol : for sort option (default is column 0) * * -sortString : sort string for sort option * * -numbRows : number of rows for new table to create * * -seed : unique column number used for creating new tables * * -debug : print out debug info with results (use caution, for example do not * used when creating large tables) * * examples: * * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER * TestComparator -verifyOption load -pathTable1 /user/hadoopqa/table1 * -pathTable2 /user/hadoopqa/table2 * * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER * TestComparator -verifyOption sort -pathTable1 /user/hadoopqa/table1 * -pathTable2 /user/hadoopqa/table2 -sortCol 0 * * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER * TestComparator -verifyOption merge-join -pathTable1 /user/hadoopqa/table1 * -sortCol 0 * * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER * TestComparator -verifyOption sorted-union -pathTable1 * /user/hadoopqa/unionTable1 -pathUnionTables /user/hadoopqa/inputTable1 * /user/hadoopqa/inputTable2 /user/hadoopqa/inputTable3 -sortCol 0 -rowMod 5 * * java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER * TestComparator -verifyOption dump -pathTable1 /user/hadoopqa/table1 * * @param args */ public static void main(String[] args) { long startTime = System.currentTimeMillis(); System.out.println("Running Zebra TestComparator"); try { ArrayList<String> unionPaths = new ArrayList<String>(); String verifyOption = null; String pathTable1 = null; String pathTable2 = null; String sortString = null; String verifyDataColName = null; int rowMod = 1; // default to verify every table row int numbCols = 0; // if provided, verify that table has these number of // columns int sortCol = 0; // default to first column as sort index long numbRows = 0; // number of rows to create for new table int seed = 0; // used for creating new tabletable1 boolean debug = false; // Read arguments if (args.length >= 2) { for (int i = 0; i < args.length; ++i) { if (args[i].equals("-verifyOption")) { verifyOption = args[++i]; } else if (args[i].equals("-pathTable1")) { pathTable1 = args[++i]; } else if (args[i].equals("-pathTable2")) { pathTable2 = args[++i]; } else if (args[i].equals("-pathUnionTables")) { while (++i < args.length && !args[i].startsWith("-")) { System.out.println("args[i] : " + args[i]); unionPaths.add(args[i]); } if (i < args.length) --i; } else if (args[i].equals("-rowMod")) { rowMod = new Integer(args[++i]).intValue(); } else if (args[i].equals("-sortString")) { sortString = args[++i]; } else if (args[i].equals("-sortCol")) { sortCol = new Integer(args[++i]).intValue(); } else if (args[i].equals("-numbCols")) { numbCols = new Integer(args[++i]).intValue(); } else if (args[i].equals("-numbRows")) { numbRows = new Long(args[++i]).intValue(); } else if (args[i].equals("-seed")) { seed = new Integer(args[++i]).intValue(); } else if (args[i].equals("-verifyDataColName")) { verifyDataColName = args[++i]; } else if (args[i].equals("-debug")) { debug = true; } else { System.out.println("Exiting - unknown argument : " + args[i]); System.exit(0); } } } else { System.out .println("Error - need to provide required comparator arguments"); System.exit(0); } // Setup environment setUp(); // // Run appropriate verify option // if (verifyOption == null) { System.out.println("Exiting -verifyOption not set"); System.exit(0); } if (verifyOption.equals("load")) { // Verify both tables are equal verifyLoad(pathTable1, pathTable2, numbCols); } else if (verifyOption.equals("sort")) { // Verify table is in sorted order verifySorted(pathTable1, pathTable2, sortCol, sortString, numbCols, rowMod); } else if (verifyOption.equals("merge-join")) { // Verify merge-join table is in sorted order verifyMergeJoin(pathTable1, sortCol, sortString, numbCols, rowMod,verifyDataColName); } else if (verifyOption.equals("sorted-union")) { Object lastVal = null; // Verify sorted-union table is in sorted order verifySortedUnion(unionPaths, pathTable1, sortCol, sortString, numbCols, rowMod, verifyDataColName); } else if (verifyOption.equals("dump")) { // Dump table info printTableInfo(pathTable1); } else if (verifyOption.equals("tableinfo")) { // Verify table to get row and column info verifyTable(pathTable1); } else if (verifyOption.equals("deletetable")) { // Delete table directory deleteTable(pathTable1); } else if (verifyOption.equals("printrows")) { // Print some table rows printRows(pathTable1, numbRows); } else if (verifyOption.equals("createtable")) { // Create unsorted table createtable(pathTable1, numbRows, seed, debug); } else if (verifyOption.equals("createsorttable")) { // Create sorted table createsortedtable(pathTable1, pathTable2, sortString, debug); }else if (verifyOption.equals("printrownumber")) { Object lastVal = null; //print total number of rows of the table printRowNumber(pathTable1,sortString); } // else { System.out.println("Exiting - unknown -verifyOption value : " + verifyOption); System.exit(0); } } catch (Exception e) { e.printStackTrace(); } long stopTime = System.currentTimeMillis(); System.out.println("\nElapsed time : " + printTime(startTime, stopTime) + "\n"); } }