/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.test; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Properties; import java.util.Set; import org.apache.pig.ExecType; import org.apache.pig.FuncSpec; import org.apache.pig.data.DataType; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileSpec; import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.util.MultiMap; import org.apache.pig.newplan.Operator; import org.apache.pig.newplan.logical.LogicalPlanMigrationVistor; import org.apache.pig.newplan.logical.expression.AndExpression; import org.apache.pig.newplan.logical.expression.CastExpression; import org.apache.pig.newplan.logical.expression.ConstantExpression; import org.apache.pig.newplan.logical.expression.EqualExpression; import org.apache.pig.newplan.logical.expression.LogicalExpression; import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan; import org.apache.pig.newplan.logical.expression.ProjectExpression; import org.apache.pig.newplan.logical.relational.LOCogroup; import org.apache.pig.newplan.logical.relational.LOForEach; import org.apache.pig.newplan.logical.relational.LOGenerate; import org.apache.pig.newplan.logical.relational.LOInnerLoad; import org.apache.pig.newplan.logical.relational.LOJoin; import org.apache.pig.newplan.logical.relational.LOLoad; import org.apache.pig.newplan.logical.relational.LOStore; import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator; import org.apache.pig.newplan.logical.relational.LogicalSchema; import org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema; import org.apache.pig.test.utils.LogicalPlanTester; import junit.framework.TestCase; public class TestLogicalPlanMigrationVisitor extends TestCase { PigContext pc = new PigContext(ExecType.LOCAL, new Properties()); public void testSimplePlan() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load 'd.txt';"); lpt.buildPlan("b = filter a by $0==NULL;"); LogicalPlan plan = lpt.buildPlan("store b into 'empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); assertEquals(3, newPlan.size()); assertEquals(newPlan.getSources().size(), 1); // check load LogicalRelationalOperator op = (LogicalRelationalOperator)newPlan.getSources().get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOLoad.class); // check filter op = (LogicalRelationalOperator)newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOFilter.class); LogicalExpressionPlan exp = ((org.apache.pig.newplan.logical.relational.LOFilter)op).getFilterPlan(); EqualExpression eq = (EqualExpression)exp.getSources().get(0); assertEquals(eq.getLhs().getClass(), ProjectExpression.class); assertEquals(((ProjectExpression)eq.getLhs()).getColNum(), 0); assertEquals(((ProjectExpression)eq.getLhs()).getInputNum(), 0); assertEquals(eq.getRhs().getClass(), ConstantExpression.class); // check store op = (LogicalRelationalOperator)newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOStore.class); } public void testPlanWithCast() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load 'd.txt' as (id, c);"); lpt.buildPlan("b = filter a by (int)id==10;"); LogicalPlan plan = lpt.buildPlan("store b into 'empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); assertEquals(3, newPlan.size()); assertEquals(newPlan.getSources().size(), 1); // check load LogicalRelationalOperator op = (LogicalRelationalOperator)newPlan.getSources().get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOLoad.class); // check filter op = (LogicalRelationalOperator)newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOFilter.class); LogicalExpressionPlan exp = ((org.apache.pig.newplan.logical.relational.LOFilter)op).getFilterPlan(); EqualExpression eq = (EqualExpression)exp.getSources().get(0); assertEquals(eq.getLhs().getClass(), CastExpression.class); assertEquals(eq.getLhs().getClass(), CastExpression.class); LogicalExpression ep = (LogicalExpression)exp.getSuccessors(eq.getLhs()).get(0); assertEquals(ep.getClass(), ProjectExpression.class); assertEquals(((ProjectExpression)ep).getColNum(), 0); assertEquals(((ProjectExpression)ep).getInputNum(), 0); assertEquals(eq.getRhs().getClass(), ConstantExpression.class); // check store op = (LogicalRelationalOperator)newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOStore.class); } public void testJoinPlan() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load 'd1.txt' as (id, c);"); lpt.buildPlan("b = load 'd2.txt'as (id, c);"); lpt.buildPlan("c = join a by id, b by c;"); lpt.buildPlan("d = filter c by a::id==NULL AND b::c==NULL;"); LogicalPlan plan = lpt.buildPlan("store d into 'empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); assertEquals(5, newPlan.size()); assertEquals(newPlan.getSources().size(), 2); // check load and join LogicalRelationalOperator op = (LogicalRelationalOperator)newPlan.getSuccessors(newPlan.getSources().get(0)).get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOJoin.class); assertEquals(((LOJoin)op).getJoinType(), LOJoin.JOINTYPE.HASH); LogicalRelationalOperator l1 = (LogicalRelationalOperator)newPlan.getPredecessors(op).get(0); assertEquals(l1.getClass(), org.apache.pig.newplan.logical.relational.LOLoad.class); assertEquals(l1.getAlias(), "a"); LogicalRelationalOperator l2 = (LogicalRelationalOperator)newPlan.getPredecessors(op).get(1); assertEquals(l2.getClass(), org.apache.pig.newplan.logical.relational.LOLoad.class); assertEquals(l2.getAlias(), "b"); // check join input plans LogicalExpressionPlan p1 = ((LOJoin)op).getJoinPlan(0).iterator().next(); assertEquals(p1.size(), 1); ProjectExpression prj = (ProjectExpression)p1.getSources().get(0); assertEquals(prj.getInputNum(), 0); assertEquals(prj.getColNum(), 0); LogicalExpressionPlan p2 = ((LOJoin)op).getJoinPlan(1).iterator().next(); assertEquals(p2.size(), 1); prj = (ProjectExpression)p2.getSources().get(0); assertEquals(prj.getInputNum(), 1); assertEquals(prj.getColNum(), 1); // check filter op = (LogicalRelationalOperator)newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOFilter.class); LogicalExpressionPlan exp = ((org.apache.pig.newplan.logical.relational.LOFilter)op).getFilterPlan(); AndExpression ae = (AndExpression)exp.getSources().get(0); EqualExpression eq = (EqualExpression)exp.getSuccessors(ae).get(0); assertEquals(eq.getLhs().getClass(), ProjectExpression.class); assertEquals(((ProjectExpression)eq.getLhs()).getColNum(), 0); assertEquals(((ProjectExpression)eq.getLhs()).getInputNum(), 0); assertEquals(eq.getRhs().getClass(), ConstantExpression.class); eq = (EqualExpression)exp.getSuccessors(ae).get(1); assertEquals(eq.getLhs().getClass(), ProjectExpression.class); assertEquals(((ProjectExpression)eq.getLhs()).getColNum(), 3); assertEquals(((ProjectExpression)eq.getLhs()).getInputNum(), 0); assertEquals(eq.getRhs().getClass(), ConstantExpression.class); // check store op = (LogicalRelationalOperator)newPlan.getSuccessors(op).get(0); assertEquals(op.getClass(), org.apache.pig.newplan.logical.relational.LOStore.class); } public void testForeachPlan() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (id, d);"); lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); org.apache.pig.newplan.logical.relational.LogicalPlan expected = new org.apache.pig.newplan.logical.relational.LogicalPlan(); LogicalSchema aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d", null, DataType.BYTEARRAY)); LOLoad load = new LOLoad(new FileSpec("/test/d.txt", new FuncSpec("org.apache.pig.builtin.PigStorage")), aschema, expected, null); expected.add(load); LOForEach foreach = new LOForEach(expected); org.apache.pig.newplan.logical.relational.LogicalPlan innerPlan = new org.apache.pig.newplan.logical.relational.LogicalPlan(); LOInnerLoad l1 = new LOInnerLoad(innerPlan, foreach, 0); innerPlan.add(l1); LOInnerLoad l2 = new LOInnerLoad(innerPlan, foreach, 1); innerPlan.add(l2); List<LogicalExpressionPlan> eps = new ArrayList<LogicalExpressionPlan>(); LOGenerate gen = new LOGenerate(innerPlan, eps, new boolean[] {false, true}); LogicalExpressionPlan p1 = new LogicalExpressionPlan(); p1.add(new ProjectExpression(p1, 0, -1, gen)); LogicalExpressionPlan p2 = new LogicalExpressionPlan(); p2.add(new ProjectExpression(p2, 1, -1, gen)); eps.add(p1); eps.add(p2); innerPlan.add(gen); innerPlan.connect(l1, gen); innerPlan.connect(l2, gen); foreach.setInnerPlan(innerPlan); expected.add(foreach); LOStore s = new LOStore(expected, new FileSpec("/test/empty", new FuncSpec("org.apache.pig.builtin.PigStorage"))); expected.add(s); expected.connect(load, foreach); expected.connect(foreach, s); assertTrue(expected.isEqual(newPlan)); LogicalSchema schema = foreach.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d", null, DataType.BYTEARRAY)); assertTrue(schema.isEqual(aschema)); } public void testForeachSchema() throws Exception { // test flatten LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (id, d:tuple(v, s));"); LogicalPlan plan = lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); LogicalRelationalOperator op = (LogicalRelationalOperator)newPlan.getSinks().get(0); LogicalSchema s2 = new LogicalSchema(); s2.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); s2.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.BYTEARRAY)); s2.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(s2.isEqual(op.getSchema())); // test no flatten lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(v, s)});"); plan = lpt.buildPlan("b = foreach a generate id, d;"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator)newPlan.getSinks().get(0); LogicalSchema aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); LogicalSchema aschema2 = new LogicalSchema(); LogicalSchema aschema3 = new LogicalSchema(); aschema3.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.BYTEARRAY)); aschema3.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); aschema2.addField(new LogicalSchema.LogicalFieldSchema("t", aschema3, DataType.TUPLE)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d", aschema2, DataType.BAG)); assertTrue(aschema.isEqual(op.getSchema())); // check with defined data type lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(v:int, s)});"); lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); plan = lpt.buildPlan("store b into '/test/empty';"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator)newPlan.getSinks().get(0); op = (LogicalRelationalOperator)newPlan.getPredecessors(op).get(0); LogicalSchema schema = op.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema("v", null, DataType.INTEGER)); aschema.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(schema.isEqual(aschema)); // test with add lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (id, v:int, s:int);"); lpt.buildPlan("b = foreach a generate id, v+s;"); plan = lpt.buildPlan("store b into '/test/empty';"); newPlan = migratePlan(plan); op = (LogicalRelationalOperator)newPlan.getSinks().get(0); op = (LogicalRelationalOperator)newPlan.getPredecessors(op).get(0); schema = op.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema(null, null, DataType.INTEGER)); assertTrue(schema.isEqual(aschema)); } public void testForeachPlan2() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (id, d:bag{t:(id:int, s)});"); lpt.buildPlan("b = foreach a generate id, FLATTEN(d);"); LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); org.apache.pig.newplan.logical.relational.LogicalPlan expected = new org.apache.pig.newplan.logical.relational.LogicalPlan(); LogicalSchema aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); LogicalSchema aschema2 = new LogicalSchema(); LogicalSchema aschema3 = new LogicalSchema(); aschema3.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.INTEGER)); aschema3.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); aschema2.addField(new LogicalSchema.LogicalFieldSchema("t", aschema3, DataType.TUPLE)); aschema2.setTwoLevelAccessRequired(true); aschema.addField(new LogicalSchema.LogicalFieldSchema("d", aschema2, DataType.BAG)); LOLoad load = new LOLoad(new FileSpec("/test/d.txt", new FuncSpec("org.apache.pig.builtin.PigStorage")), aschema, expected, null); expected.add(load); LOForEach foreach2 = new LOForEach(expected); org.apache.pig.newplan.logical.relational.LogicalPlan innerPlan = new org.apache.pig.newplan.logical.relational.LogicalPlan(); LOInnerLoad l1 = new LOInnerLoad(innerPlan, foreach2, 0); innerPlan.add(l1); LOInnerLoad l2 = new LOInnerLoad(innerPlan, foreach2, 1); innerPlan.add(l2); List<LogicalExpressionPlan> eps = new ArrayList<LogicalExpressionPlan>(); LOGenerate gen = new LOGenerate(innerPlan, eps, new boolean[] {false, true}); LogicalExpressionPlan p1 = new LogicalExpressionPlan(); new ProjectExpression(p1, 0, -1, gen); LogicalExpressionPlan p2 = new LogicalExpressionPlan(); new ProjectExpression(p2, 1, -1, gen); eps.add(p1); eps.add(p2); innerPlan.add(gen); innerPlan.connect(l1, gen); innerPlan.connect(l2, gen); foreach2.setInnerPlan(innerPlan); expected.add(foreach2); LOStore s = new LOStore(expected, new FileSpec("/test/empty", new FuncSpec("org.apache.pig.builtin.PigStorage"))); expected.add(s); expected.connect(load, foreach2); expected.connect(foreach2, s); System.out.println(newPlan); System.out.println(expected); assertTrue(expected.isEqual(newPlan)); LogicalSchema schema = foreach2.getSchema(); aschema = new LogicalSchema(); aschema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.BYTEARRAY)); aschema.addField(new LogicalSchema.LogicalFieldSchema("d::id", null, DataType.INTEGER)); aschema.addField(new LogicalSchema.LogicalFieldSchema("s", null, DataType.BYTEARRAY)); assertTrue(schema.isEqual(aschema)); assertTrue(schema.getField("id")==schema.getField(0)); assertTrue(schema.getField("d::id")==schema.getField(1)); } public void testCoGroup() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);"); lpt.buildPlan("b = group a by name;"); LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); LogicalSchema loadSchema = ((LogicalRelationalOperator)newPlan.getSources().get(0)).getSchema(); Set<Long> uids = getAllUids(loadSchema); LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSuccessors( newPlan.getSources().get(0) ).get(0); assertEquals( LOCogroup.class, op.getClass() ); LogicalSchema schema = op.getSchema(); assertEquals( 2, schema.size() ); assertEquals( DataType.CHARARRAY, schema.getField(0).type ); assertEquals( false, uids.contains( schema.getField(0).uid ) ); assertEquals( 0, schema.getField(0).alias.compareTo("group") ); assertEquals( DataType.BAG, schema.getField(1).type ); assertEquals( DataType.CHARARRAY, schema.getField(1).schema.getField(0).type ); assertEquals( 0, schema.getField(1).schema.getField(0).alias.compareTo("name") ); assertEquals( loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid ); assertEquals( DataType.INTEGER, schema.getField(1).schema.getField(1).type ); assertEquals( 0, schema.getField(1).schema.getField(1).alias.compareTo("age") ); assertEquals( loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid ); assertEquals( DataType.FLOAT, schema.getField(1).schema.getField(2).type ); assertEquals( 0, schema.getField(1).schema.getField(2).alias.compareTo("gpa") ); assertEquals( loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid ); uids.add(Long.valueOf( schema.getField(0).uid ) ); assertEquals( false, uids.contains( schema.getField(1).uid ) ); assertEquals( LOCogroup.class, newPlan.getSuccessors(newPlan.getSources().get(0)).get(0).getClass() ); LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0); MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans(); assertEquals( 1, expressionPlans.size() ); List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0)); assertEquals( 1, plans.size() ); LogicalExpressionPlan exprPlan = plans.get(0); assertEquals( 1, exprPlan.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan.getSinks().get(0).getClass() ); ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0); assertEquals( loadSchema.getField(0).uid, prj.getFieldSchema().uid ); assertEquals( 0, prj.getColNum() ); assertEquals( 0, prj.getInputNum() ); } public void testCoGroup2() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);"); lpt.buildPlan("b = group a by ( name, age );"); LogicalPlan plan = lpt.buildPlan("store b into '/test/empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); LogicalSchema loadSchema = ((LogicalRelationalOperator)newPlan.getSources().get(0)).getSchema(); Set<Long> uids = getAllUids(loadSchema); LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSuccessors( newPlan.getSources().get(0) ).get(0); assertEquals( LOCogroup.class, op.getClass() ); LogicalSchema schema = op.getSchema(); assertEquals( 2, schema.size() ); assertEquals( DataType.TUPLE, schema.getField(0).type ); assertEquals( false, uids.contains( schema.getField(0).uid ) ); assertEquals( 0, schema.getField(0).alias.compareTo("group") ); assertEquals( DataType.CHARARRAY, schema.getField(0).schema.getField(0).type ); assertEquals( DataType.INTEGER, schema.getField(0).schema.getField(1).type ); assertEquals( DataType.BAG, schema.getField(1).type ); assertEquals( DataType.CHARARRAY, schema.getField(1).schema.getField(0).type ); assertEquals( 0, schema.getField(1).schema.getField(0).alias.compareTo("name") ); assertEquals( loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid ); assertEquals( DataType.INTEGER, schema.getField(1).schema.getField(1).type ); assertEquals( 0, schema.getField(1).schema.getField(1).alias.compareTo("age") ); assertEquals( loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid ); assertEquals( DataType.FLOAT, schema.getField(1).schema.getField(2).type ); assertEquals( 0, schema.getField(1).schema.getField(2).alias.compareTo("gpa") ); assertEquals( loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid ); // We are doing Uid tests at the end as the uids should not repeat uids.add(Long.valueOf( schema.getField(0).uid ) ); assertEquals( false, uids.contains( schema.getField(0).schema.getField(0).uid ) ); uids.add( Long.valueOf( schema.getField(0).schema.getField(0).uid ) ); assertEquals( false, uids.contains( schema.getField(0).schema.getField(1).uid ) ); uids.add( Long.valueOf( schema.getField(0).schema.getField(1).uid ) ); assertEquals( false, uids.contains( schema.getField(1).uid ) ); assertEquals( LOCogroup.class, newPlan.getSuccessors(newPlan.getSources().get(0)).get(0).getClass() ); LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors(newPlan.getSources().get(0)).get(0); MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans(); assertEquals( 1, expressionPlans.size() ); List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0)); assertEquals( 2, plans.size() ); LogicalExpressionPlan exprPlan = plans.get(0); assertEquals( 1, exprPlan.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan.getSinks().get(0).getClass() ); ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0); assertEquals( loadSchema.getField(0).uid, prj.getFieldSchema().uid ); assertEquals( 0, prj.getColNum() ); assertEquals( 0, prj.getInputNum() ); LogicalExpressionPlan exprPlan2 = plans.get(1); assertEquals( 1, exprPlan2.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan2.getSinks().get(0).getClass() ); ProjectExpression prj2 = (ProjectExpression) exprPlan2.getSinks().get(0); assertEquals( loadSchema.getField(1).uid, prj2.getFieldSchema().uid ); assertEquals( 1, prj2.getColNum() ); assertEquals( 0, prj2.getInputNum() ); } public void testCoGroup3() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);"); lpt.buildPlan("b = load '/test/e.txt' as (name:chararray, blah:chararray );"); lpt.buildPlan("c = group a by name, b by name;"); LogicalPlan plan = lpt.buildPlan("store c into '/test/empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); assertEquals( LOCogroup.class, newPlan.getSuccessors( newPlan.getSources().get(0) ).get(0).getClass() ); LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors( newPlan.getSources().get(0) ).get(0); // Reason for this strange way of getting the load schema is to maintain the sequence correctly LogicalSchema loadSchema = ((LogicalRelationalOperator)newPlan.getPredecessors(cogroup).get(0)).getSchema(); LogicalSchema load2Schema = ((LogicalRelationalOperator)newPlan.getPredecessors(cogroup).get(1)).getSchema(); Set<Long> uids = getAllUids(loadSchema); uids.addAll( getAllUids( load2Schema ) ); LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSuccessors( newPlan.getSources().get(0) ).get(0); assertEquals( LOCogroup.class, op.getClass() ); LogicalSchema schema = op.getSchema(); assertEquals( 3, schema.size() ); assertEquals( DataType.CHARARRAY, schema.getField(0).type ); assertEquals( false, uids.contains( schema.getField(0).uid ) ); assertEquals( 0, schema.getField(0).alias.compareTo("group") ); assertEquals( DataType.BAG, schema.getField(1).type ); assertEquals( DataType.CHARARRAY, schema.getField(1).schema.getField(0).type ); assertEquals( 0, schema.getField(1).schema.getField(0).alias.compareTo("name") ); assertEquals( loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid ); assertEquals( DataType.INTEGER, schema.getField(1).schema.getField(1).type ); assertEquals( 0, schema.getField(1).schema.getField(1).alias.compareTo("age") ); assertEquals( loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid ); assertEquals( DataType.FLOAT, schema.getField(1).schema.getField(2).type ); assertEquals( 0, schema.getField(1).schema.getField(2).alias.compareTo("gpa") ); assertEquals( loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid ); assertEquals( DataType.BAG, schema.getField(2).type ); assertEquals( DataType.CHARARRAY, schema.getField(2).schema.getField(0).type ); assertEquals( 0, schema.getField(2).schema.getField(0).alias.compareTo("name") ); assertEquals( load2Schema.getField(0).uid, schema.getField(2).schema.getField(0).uid ); assertEquals( DataType.CHARARRAY, schema.getField(2).schema.getField(1).type ); assertEquals( 0, schema.getField(2).schema.getField(1).alias.compareTo("blah") ); assertEquals( load2Schema.getField(1).uid, schema.getField(2).schema.getField(1).uid ); // We are doing Uid tests at the end as the uids should not repeat assertEquals( false, uids.contains( schema.getField(1).uid ) ); uids.add( schema.getField(1).uid ); assertEquals( false, uids.contains( schema.getField(2).uid) ); MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans(); assertEquals( 2, expressionPlans.size() ); List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0)); assertEquals( 1, plans.size() ); List<LogicalExpressionPlan> plans2 = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(1)); assertEquals( 1, plans2.size() ); LogicalExpressionPlan exprPlan = plans.get(0); assertEquals( 1, exprPlan.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan.getSinks().get(0).getClass() ); ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0); assertEquals( loadSchema.getField(0).uid, prj.getFieldSchema().uid ); assertEquals( 0, prj.getColNum() ); assertEquals( 0, prj.getInputNum() ); LogicalExpressionPlan exprPlan2 = plans2.get(0); assertEquals( 1, exprPlan2.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan2.getSinks().get(0).getClass() ); ProjectExpression prj2 = (ProjectExpression) exprPlan2.getSinks().get(0); assertEquals( load2Schema.getField(0).uid, prj2.getFieldSchema().uid ); assertEquals( 0, prj2.getColNum() ); assertEquals( 1, prj2.getInputNum() ); } public void testCoGroup4() throws Exception { LogicalPlanTester lpt = new LogicalPlanTester(pc); lpt.buildPlan("a = load '/test/d.txt' as (name:chararray, age:int, gpa:float);"); lpt.buildPlan("b = load '/test/e.txt' as (name:chararray, age:int, blah:chararray );"); lpt.buildPlan("c = group a by ( name, age ), b by ( name, age );"); LogicalPlan plan = lpt.buildPlan("store c into '/test/empty';"); // check basics org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = migratePlan(plan); assertEquals( LOCogroup.class, newPlan.getSuccessors( newPlan.getSources().get(0) ).get(0).getClass() ); LOCogroup cogroup = (LOCogroup) newPlan.getSuccessors( newPlan.getSources().get(0) ).get(0); // Reason for this strange way of getting the load schema is to maintain the sequence correctly LogicalSchema loadSchema = ((LogicalRelationalOperator)newPlan.getPredecessors(cogroup).get(0)).getSchema(); LogicalSchema load2Schema = ((LogicalRelationalOperator)newPlan.getPredecessors(cogroup).get(1)).getSchema(); Set<Long> uids = getAllUids(loadSchema); uids.addAll( getAllUids( load2Schema ) ); LogicalRelationalOperator op = (LogicalRelationalOperator) newPlan.getSuccessors( newPlan.getSources().get(0) ).get(0); assertEquals( LOCogroup.class, op.getClass() ); LogicalSchema schema = op.getSchema(); assertEquals( 3, schema.size() ); assertEquals( DataType.TUPLE, schema.getField(0).type ); assertEquals( false, uids.contains( schema.getField(0).uid ) ); assertEquals( 0, schema.getField(0).alias.compareTo("group") ); assertEquals( DataType.CHARARRAY, schema.getField(0).schema.getField(0).type ); assertEquals( 0, schema.getField(0).schema.getField(0).alias.compareTo("name") ); assertEquals( DataType.INTEGER, schema.getField(0).schema.getField(1).type ); assertEquals( 0, schema.getField(0).schema.getField(1).alias.compareTo("age") ); assertEquals( DataType.BAG, schema.getField(1).type ); assertEquals( DataType.CHARARRAY, schema.getField(1).schema.getField(0).type ); assertEquals( 0, schema.getField(1).schema.getField(0).alias.compareTo("name") ); assertEquals( loadSchema.getField(0).uid, schema.getField(1).schema.getField(0).uid ); assertEquals( DataType.INTEGER, schema.getField(1).schema.getField(1).type ); assertEquals( 0, schema.getField(1).schema.getField(1).alias.compareTo("age") ); assertEquals( loadSchema.getField(1).uid, schema.getField(1).schema.getField(1).uid ); assertEquals( DataType.FLOAT, schema.getField(1).schema.getField(2).type ); assertEquals( 0, schema.getField(1).schema.getField(2).alias.compareTo("gpa") ); assertEquals( loadSchema.getField(2).uid, schema.getField(1).schema.getField(2).uid ); assertEquals( DataType.BAG, schema.getField(2).type ); assertEquals( DataType.CHARARRAY, schema.getField(2).schema.getField(0).type ); assertEquals( 0, schema.getField(2).schema.getField(0).alias.compareTo("name") ); assertEquals( load2Schema.getField(0).uid, schema.getField(2).schema.getField(0).uid ); assertEquals( DataType.INTEGER, schema.getField(2).schema.getField(1).type ); assertEquals( 0, schema.getField(2).schema.getField(1).alias.compareTo("age") ); assertEquals( load2Schema.getField(1).uid, schema.getField(2).schema.getField(1).uid ); assertEquals( DataType.CHARARRAY, schema.getField(2).schema.getField(2).type ); assertEquals( 0, schema.getField(2).schema.getField(2).alias.compareTo("blah") ); assertEquals( load2Schema.getField(2).uid, schema.getField(2).schema.getField(2).uid ); // We are doing Uid tests at the end as the uids should not repeat assertEquals( false, uids.contains( schema.getField(0).schema.getField(0).uid ) ); assertEquals( false, uids.contains( schema.getField(0).schema.getField(1).uid ) ); assertEquals( false, uids.contains( schema.getField(1).uid ) ); uids.add( schema.getField(1).uid ); assertEquals( false, uids.contains( schema.getField(2).uid) ); MultiMap<Integer, LogicalExpressionPlan> expressionPlans = cogroup.getExpressionPlans(); assertEquals( 2, expressionPlans.size() ); List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(0)); assertEquals( 2, plans.size() ); List<LogicalExpressionPlan> plans2 = (List<LogicalExpressionPlan>) expressionPlans.get(Integer.valueOf(1)); assertEquals( 2, plans2.size() ); LogicalExpressionPlan exprPlan = plans.get(0); assertEquals( 1, exprPlan.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan.getSinks().get(0).getClass() ); ProjectExpression prj = (ProjectExpression) exprPlan.getSinks().get(0); assertEquals( loadSchema.getField(0).uid, prj.getFieldSchema().uid ); assertEquals( 0, prj.getColNum() ); assertEquals( 0, prj.getInputNum() ); LogicalExpressionPlan exprPlan2 = plans.get(1); assertEquals( 1, exprPlan2.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan2.getSinks().get(0).getClass() ); ProjectExpression prj2 = (ProjectExpression) exprPlan2.getSinks().get(0); assertEquals( loadSchema.getField(1).uid, prj2.getFieldSchema().uid ); assertEquals( 1, prj2.getColNum() ); assertEquals( 0, prj2.getInputNum() ); LogicalExpressionPlan exprPlan3 = plans2.get(0); assertEquals( 1, exprPlan3.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan3.getSinks().get(0).getClass() ); ProjectExpression prj3 = (ProjectExpression) exprPlan3.getSinks().get(0); assertEquals( load2Schema.getField(0).uid, prj3.getFieldSchema().uid ); assertEquals( 0, prj3.getColNum() ); assertEquals( 1, prj3.getInputNum() ); LogicalExpressionPlan exprPlan4 = plans2.get(1); assertEquals( 1, exprPlan4.getSinks().size() ); assertEquals( ProjectExpression.class, exprPlan4.getSinks().get(0).getClass() ); ProjectExpression prj4 = (ProjectExpression) exprPlan4.getSinks().get(0); assertEquals( load2Schema.getField(1).uid, prj4.getFieldSchema().uid ); assertEquals( 1, prj4.getColNum() ); assertEquals( 1, prj4.getInputNum() ); } /** * Obtains all the uids from the schema * @param schema * @return Set of uids from this schema. Its a recursive call */ private Set<Long> getAllUids( LogicalSchema schema ) { Set<Long> uids = new HashSet<Long>(); if( schema != null ) { for( LogicalFieldSchema fieldSchema : schema.getFields() ) { if( ( fieldSchema.type == DataType.BAG || fieldSchema.type == DataType.TUPLE ) && fieldSchema.schema != null ) { uids.addAll( getAllUids( fieldSchema.schema ) ); } else { uids.add( fieldSchema.uid ); } } } return uids; } private org.apache.pig.newplan.logical.relational.LogicalPlan migratePlan(LogicalPlan lp) throws VisitorException{ LogicalPlanMigrationVistor visitor = new LogicalPlanMigrationVistor(lp); visitor.visit(); org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = visitor.getNewLogicalPlan(); return newPlan; } }