/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.zebra.pig; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.zebra.BaseTestCase; import org.apache.hadoop.zebra.io.BasicTable; import org.apache.hadoop.zebra.io.TableInserter; import org.apache.hadoop.zebra.io.TestBasicTable; import org.apache.hadoop.zebra.schema.Schema; import org.apache.hadoop.zebra.types.TypesUtils; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; import org.apache.pig.data.DataBag; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; public class TestMapSideCoGroup extends BaseTestCase { private static Path table1, table2; private static Configuration conf; @BeforeClass public static void setUp() throws Exception { init(); TestBasicTable.setUpOnce(); conf = TestBasicTable.conf; table1 = getTableFullPath( "TestMapSideCoGroup1" ); removeDir( table1 ); table2 = getTableFullPath( "TestMapSideCoGroup2" ); removeDir( table2 ); } @AfterClass public static void tearDown() throws Exception { pigServer.shutdown(); } @Test public void test() throws IOException { int table1RowCount = 100000; int table2RowCount = 200000; int table1DupFactor = 15; int table2DupFactor = 125; createTable( table1RowCount, table1DupFactor, "a:int, b:string, c:string", "[a, b, c]", "a", table1 ); createTable( table2RowCount, table2DupFactor, "a:int, d:string", "[a, d]", "a", table2 ); String qs1 = "T1 = load '" + table1.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, b, c', 'sorted');"; System.out.println( "qs1: " + qs1 ); String qs2 = "T2 = load '" + table2.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader('a, d', 'sorted');"; System.out.println( "qs2: " + qs2 ); pigServer.registerQuery( qs1 ); pigServer.registerQuery( qs2 ); String qs3 = "T3 = cogroup T1 by a, T2 by a USING 'merge';"; pigServer.registerQuery( qs3 ); org.apache.pig.impl.logicalLayer.schema.Schema schema = pigServer.dumpSchema( "T3" ); Assert.assertEquals( "{group: int,T1: {(a: int,b: chararray,c: chararray)},T2: {(a: int,d: chararray)}}", schema.toString() ); Iterator<Tuple> it = pigServer.openIterator( "T3" ); int count = 0; int expectedCount = Math.max( table1RowCount/table1DupFactor, table2RowCount/table2DupFactor) + 1; int totalRowsInBag1 = 0; int totalRowsInBag2 = 0; while( it.hasNext() ) { Tuple result = it.next(); totalRowsInBag1 += ( (DataBag)result.get( 1 ) ).size(); totalRowsInBag2 += ( (DataBag)result.get( 2 ) ).size(); // System.out.println( "tuple = " + result.toDelimitedString( "," ) ); count++; } Assert.assertEquals( expectedCount, count ); Assert.assertEquals(table1RowCount, totalRowsInBag1 ); Assert.assertEquals(table2RowCount, totalRowsInBag2 ); } public static void createTable(int rows, int step, String strSchema, String storage, String sortColumns, Path path) throws IOException { if( fs.exists(path) ) { BasicTable.drop(path, conf); } BasicTable.Writer writer = new BasicTable.Writer(path, strSchema, storage, sortColumns, null, conf); writer.finish(); Schema schema = writer.getSchema(); String colNames[] = schema.getColumns(); Tuple tuple = TypesUtils.createTuple(schema); writer = new BasicTable.Writer(path, conf); TableInserter inserter = writer.getInserter( String.format("part-%06d", 1), true ); for( int i = 1; i <= rows; ++i ) { BytesWritable key = new BytesWritable( String.format( "key%09d", i/step ).getBytes() ); TypesUtils.resetTuple(tuple); tuple.set( 0, i / step ); for( int k = 1; k < tuple.size(); ++k ) { try { tuple.set( k, new String( "col-" + colNames[k] + i * 10 ) ); } catch (ExecException e) { e.printStackTrace(); } } inserter.insert(key, tuple); } inserter.close(); writer = new BasicTable.Writer(path, conf); writer.close(); } }