/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.zebra.pig; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.zebra.io.BasicTable; import org.apache.hadoop.zebra.io.TableInserter; import org.apache.hadoop.zebra.pig.TableStorer; import org.apache.hadoop.zebra.schema.Schema; import org.apache.hadoop.zebra.types.TypesUtils; import org.apache.hadoop.zebra.BaseTestCase; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; public class TestMergeJoinNegative extends BaseTestCase { final static String STR_SCHEMA1 = "a:int,b:float,c:long,d:double,e:string,f:bytes,m1:map(string)"; final static String STR_STORAGE1 = "[a, b, c]; [e, f]; [m1#{a}]"; final static String STR_SCHEMA2 = "aa:int,bb:float,ee:string"; final static String STR_STORAGE2 = "[aa, bb]; [ee]"; final static String STR_SCHEMA3 = "a:string,b:int,c:float"; final static String STR_STORAGE3 = "[a, b]; [c]"; static int fileId = 0; private static Path pathTable1; private static Path pathTable2; private static Path pathTable3; private static Path pathTable4; @BeforeClass public static void setUp() throws Exception { init(); pathTable1 = getTableFullPath("TestMergeJoinNegative1"); pathTable2 = getTableFullPath("TestMergeJoinNegative2"); pathTable3 = getTableFullPath("TestMergeJoinNegative3"); pathTable4 = getTableFullPath("TestMergeJoinNegative4"); removeDir(pathTable1); removeDir(pathTable2); removeDir(pathTable3); removeDir(pathTable4); // Create table1 data Map<String, String> m1 = new HashMap<String, String>(); m1.put("a","m1-a"); m1.put("b","m1-b"); Object[][] table1 = { {5, -3.25f, 1001L, 51e+2, "Zebra", new DataByteArray("Zebra"), m1}, {-1, 3.25f, 1000L, 50e+2, "zebra", new DataByteArray("zebra"), m1}, {1001, 100.0f, 1000L, 50e+2, "apple", new DataByteArray("apple"), m1}, {1002, 28.0f, 1000L, 50e+2, "hadoop", new DataByteArray("hadoop"), m1}, {1000, 0.0f, 1002L, 52e+2, "apple", new DataByteArray("apple"), m1} }; // Create table1 createTable(pathTable1, STR_SCHEMA1, STR_STORAGE1, table1); // Create table2 data Map<String, String> m2 = new HashMap<String, String>(); m2.put("a","m2-a"); m2.put("b","m2-b"); Object[][] table2 = { {15, 56.0f, 1004L, 50e+2, "green", new DataByteArray("green"), m2}, {-1, -99.0f, 1008L, 51e+2, "orange", new DataByteArray("orange"), m2}, {1001, 0.0f, 1000L, 55e+2, "white", new DataByteArray("white"), m2}, {1001, -88.0f, 1001L, 52e+2, "brown", new DataByteArray("brown"), m2}, {2000, 33.0f, 1002L, 52e+2, "beige", new DataByteArray("beige"), m2} }; // Create table2 createTable(pathTable2, STR_SCHEMA1, STR_STORAGE1, table2); // Create table3 data Object[][] table3 = { {0, 7.0f, "grape"}, {1001, 8.0f, "orange"}, {-200, 9.0f, "banana"}, {8, -88.0f, "peach"} }; // Create table3 createTable(pathTable3, STR_SCHEMA2, STR_STORAGE2, table3); // Create table4 data Object[][] table4 = { {"grape", 0, 7.0f}, {"orange", 1001, 8.0f}, {"banana", -200, 9.0f}, {"peach", 8, -88.0f} }; // Create table4 createTable(pathTable4, STR_SCHEMA3, STR_STORAGE3, table4); // Load table1 String query1 = "table1 = LOAD '" + pathTable1.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query1); // Load table2 String query2 = "table2 = LOAD '" + pathTable2.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query2); // Load table3 String query3 = "table3 = LOAD '" + pathTable3.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query3); // Load table4 String query4 = "table4 = LOAD '" + pathTable4.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query4); } private static void createTable(Path path, String schemaString, String storageString, Object[][] tableData) throws IOException { // Create table from tableData array BasicTable.Writer writer = new BasicTable.Writer(path, schemaString, storageString, conf); Schema schema = writer.getSchema(); Tuple tuple = TypesUtils.createTuple(schema); TableInserter inserter = writer.getInserter("ins", false); for (int i = 0; i < tableData.length; ++i) { TypesUtils.resetTuple(tuple); for (int k = 0; k < tableData[i].length; ++k) { tuple.set(k, tableData[i][k]); System.out.println("DEBUG: setting tuple k=" + k + "value= " + tableData[i][k]); } inserter.insert(new BytesWritable(("key" + i).getBytes()), tuple); } inserter.close(); writer.close(); } @AfterClass public static void tearDown() throws Exception { pigServer.shutdown(); } @Test(expected = IOException.class) public void test_merge_joint_12() throws ExecException, IOException { // // Pig script changes position of join key (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "a" + " ;"; pigServer.registerQuery(orderby1); String orderby2 = "sort2 = ORDER table2 BY " + "a" + " ;"; pigServer.registerQuery(orderby2); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort2 = pathTable2.toString() + Integer.toString(fileId); pigServer.store("sort2", pathSort2, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load2 = "records2 = LOAD '" + pathSort2 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load2); // Change position and name of join key String reorder1 = "reorder1 = FOREACH records1 GENERATE b as n2, a as n1, c as c, d as d, e as e, f as f, m1 as m1;"; pigServer.registerQuery(reorder1); // Merge tables String join = "joinRecords = JOIN reorder1 BY " + "(" + "n2" + ")" + " , records2 BY " + "("+ "a" + ")" + " USING \"merge\";"; // n2 is wrong data type pigServer.registerQuery(join); pigServer.openIterator("joinRecords"); // get iterator to trigger error } @Test(expected = IOException.class) public void test_merge_joint_13() throws ExecException, IOException { // // Pig script changes sort order (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "a" + " ;"; pigServer.registerQuery(orderby1); String orderby2 = "sort2 = ORDER table2 BY " + "a" + " ;"; pigServer.registerQuery(orderby2); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort2 = pathTable2.toString() + Integer.toString(fileId); pigServer.store("sort2", pathSort2, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load2 = "records2 = LOAD '" + pathSort2 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load2); // Change sort order for key String reorder1 = "reorder1 = FOREACH records1 GENERATE a*(-1) as a, b as b, c as c, d as d, e as e, f as f, m1 as m1;"; pigServer.registerQuery(reorder1); // Merge tables String join = "joinRecords = JOIN reorder1 BY " + "(" + "a" + ")" + " , records2 BY " + "("+ "a" + ")" + " USING \"merge\";"; pigServer.registerQuery(join); pigServer.openIterator("joinRecords"); // get iterator to trigger error } @Test(expected = IOException.class) public void test_merge_joint_14() throws ExecException, IOException { // // Left hand table is not in ascending order (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "b" + " ;"; // sort left hand table by wrong key pigServer.registerQuery(orderby1); String orderby2 = "sort2 = ORDER table2 BY " + "a" + " ;"; pigServer.registerQuery(orderby2); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort2 = pathTable2.toString() + Integer.toString(fileId); pigServer.store("sort2", pathSort2, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load2 = "records2 = LOAD '" + pathSort2 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load2); // Merge tables String join = "joinRecords = JOIN records1 BY " + "(" + "a" + ")" + " , records2 BY " + "("+ "a" + ")" + " USING \"merge\";"; pigServer.registerQuery(join); pigServer.openIterator("joinRecords"); // get iterator to trigger error } @Test(expected = IOException.class) public void test_merge_joint_15() throws ExecException, IOException { // // Right hand table is not in ascending order (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "a" + " ;"; pigServer.registerQuery(orderby1); String orderby2 = "sort2 = ORDER table2 BY " + "b" + " ;"; // sort right hand table by wrong key pigServer.registerQuery(orderby2); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort2 = pathTable2.toString() + Integer.toString(fileId); pigServer.store("sort2", pathSort2, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load2 = "records2 = LOAD '" + pathSort2 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load2); // Merge tables String join = "joinRecords = JOIN records1 BY " + "(" + "a" + ")" + " , records2 BY " + "("+ "a" + ")" + " USING \"merge\";"; pigServer.registerQuery(join); pigServer.openIterator("joinRecords"); // get iterator to trigger error } @Test(expected = IOException.class) public void test_merge_joint_16() throws ExecException, IOException { // // More than two input tables (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "a" + " ;"; pigServer.registerQuery(orderby1); String orderby2 = "sort2 = ORDER table2 BY " + "a" + " ;"; pigServer.registerQuery(orderby2); String orderby3 = "sort3 = ORDER table3 BY " + "aa" + " ;"; pigServer.registerQuery(orderby3); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort2 = pathTable2.toString() + Integer.toString(fileId); pigServer.store("sort2", pathSort2, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort3 = pathTable3.toString() + Integer.toString(fileId); pigServer.store("sort3", pathSort3, TableStorer.class.getCanonicalName() + "('[aa, bb]; [ee]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load2 = "records2 = LOAD '" + pathSort2 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load2); String load3 = "records3 = LOAD '" + pathSort3 + "' USING org.apache.hadoop.zebra.pig.TableLoader('aa, bb, ee', 'sorted');"; pigServer.registerQuery(load3); // Merge tables String join = "joinRecords = JOIN records1 BY " + "(" + "a" + ")" + " , records2 BY " + "("+ "a" + ")" + " , records3 BY " + "("+ "aa" + ")" + " USING \"merge\";"; // merge three tables pigServer.registerQuery(join); } @Test(expected = IOException.class) public void test_merge_joint_25() throws ExecException, IOException { // // Two tables do not have common join key (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "a" + " ;"; pigServer.registerQuery(orderby1); String orderby3 = "sort3 = ORDER table3 BY " + "aa" + " ;"; pigServer.registerQuery(orderby3); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort3 = pathTable3.toString() + Integer.toString(fileId); pigServer.store("sort3", pathSort3, TableStorer.class.getCanonicalName() + "('[aa, bb]; [ee]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load3 = "records3 = LOAD '" + pathSort3 + "' USING org.apache.hadoop.zebra.pig.TableLoader('aa, bb, ee', 'sorted');"; pigServer.registerQuery(load3); // Merge tables String join = "joinRecords = JOIN records1 BY " + "(" + "a" + ")" + " , records3 BY " + "("+ "a" + ")" + " USING \"merge\";"; // sort key a does not exist for records3 pigServer.registerQuery(join); } @Test(expected = IOException.class) public void test_merge_joint_26() throws ExecException, IOException { // // Two tables do not have common join key (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "a" + " ;"; pigServer.registerQuery(orderby1); String orderby3 = "sort3 = ORDER table3 BY " + "aa" + " ;"; pigServer.registerQuery(orderby3); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort3 = pathTable3.toString() + Integer.toString(fileId); pigServer.store("sort3", pathSort3, TableStorer.class.getCanonicalName() + "('[aa, bb]; [ee]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load3 = "records3 = LOAD '" + pathSort3 + "' USING org.apache.hadoop.zebra.pig.TableLoader('aa, bb, ee', 'sorted');"; pigServer.registerQuery(load3); // Merge tables String join = "joinRecords = JOIN records1 BY " + "(" + "aa" + ")" + " , records3 BY " + "("+ "aa" + ")" + " USING \"merge\";"; // sort key aa does not exist for records1 pigServer.registerQuery(join); } @Test(expected = IOException.class) public void test_merge_joint_27() throws ExecException, IOException { // // Two tables do not have common join key (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "a" + " ;"; pigServer.registerQuery(orderby1); String orderby3 = "sort3 = ORDER table3 BY " + "aa" + " ;"; pigServer.registerQuery(orderby3); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort3 = pathTable3.toString() + Integer.toString(fileId); pigServer.store("sort3", pathSort3, TableStorer.class.getCanonicalName() + "('[aa, bb]; [ee]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load3 = "records3 = LOAD '" + pathSort3 + "' USING org.apache.hadoop.zebra.pig.TableLoader('aa, bb, ee', 'sorted');"; pigServer.registerQuery(load3); // Merge tables String join = "joinRecords = JOIN records1 BY " + "(" + "aaa" + ")" + " , records3 BY " + "("+ "aaa" + ")" + " USING \"merge\";"; // sort key aaa does not exist for records1 and records3 pigServer.registerQuery(join); } @Test(expected = IOException.class) public void test_merge_joint_28() throws ExecException, IOException { // // Two table key names are the same but the data types are different (negative test) // // Sort tables String orderby1 = "sort1 = ORDER table1 BY " + "a" + " ;"; // a is float pigServer.registerQuery(orderby1); String orderby4 = "sort4 = ORDER table4 BY " + "a" + " ;"; // a is string pigServer.registerQuery(orderby4); // Store sorted tables ++fileId; // increment filename suffix String pathSort1 = pathTable1.toString() + Integer.toString(fileId); pigServer.store("sort1", pathSort1, TableStorer.class.getCanonicalName() + "('[a, b, c]; [d, e, f, m1]')"); String pathSort4 = pathTable4.toString() + Integer.toString(fileId); pigServer.store("sort4", pathSort4, TableStorer.class.getCanonicalName() + "('[a, b]; [c]')"); // Load sorted tables String load1 = "records1 = LOAD '" + pathSort1 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c, d, e, f, m1', 'sorted');"; pigServer.registerQuery(load1); String load4 = "records4 = LOAD '" + pathSort4 + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c', 'sorted');"; pigServer.registerQuery(load4); // Merge tables String join = "joinRecords = JOIN records1 BY " + "(" + "a" + ")" + " , records4 BY " + "("+ "a" + ")" + " USING \"merge\";"; // sort key a is different data type for records1 and records4 pigServer.registerQuery(join); pigServer.openIterator("joinRecords"); // get iterator to trigger error } }