/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.ExecType;
import org.apache.pig.FilterFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.*;
import org.apache.pig.impl.logicalLayer.optimizer.*;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.test.utils.Identity;
import org.apache.pig.test.utils.LogicalPlanTester;
import org.apache.pig.impl.plan.optimizer.OptimizerException;
import org.junit.Test;
import org.junit.Before;
/**
* Test the logical optimizer.
*/
public class TestPushDownForeachFlatten extends junit.framework.TestCase {
final String FILE_BASE_LOCATION = "test/org/apache/pig/test/data/DotFiles/" ;
static final int MAX_SIZE = 100000;
private final Log log = LogFactory.getLog(getClass());
PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
LogicalPlanTester planTester = new LogicalPlanTester(pc) ;
private static final String simpleEchoStreamingCommand;
static {
if (System.getProperty("os.name").toUpperCase().startsWith("WINDOWS"))
simpleEchoStreamingCommand = "perl -ne 'print \\\"$_\\\"'";
else
simpleEchoStreamingCommand = "perl -ne 'print \"$_\"'";
}
@Before
public void tearDown() {
planTester.reset();
}
/**
*
* A simple filter UDF for testing
*
*/
static public class MyFilterFunc extends FilterFunc {
@Override
public Boolean exec(Tuple input) {
return false;
}
}
@Test
//Test to ensure that the right exception is thrown when the input list is empty
public void testErrorEmptyInput() throws Exception {
LogicalPlan lp = new LogicalPlan();
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
try {
pushDownForeach.check(lp.getRoots());
fail("Exception Expected!");
} catch(Exception e) {
assertTrue(((OptimizerException)e).getErrorCode() == 2052);
}
}
@Test
//Test to ensure that the right exception is thrown when the input list is empty
public void testErrorNonForeachInput() throws Exception {
LogicalPlan lp = planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");;
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
try {
pushDownForeach.check(lp.getRoots());
fail("Exception Expected!");
} catch(Exception e) {
assertTrue(((OptimizerException)e).getErrorCode() == 2005);
}
}
@Test
public void testForeachNoFlatten() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
LogicalPlan lp = planTester.buildPlan("B = foreach A generate $1;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
assertTrue(!pushDownForeach.check(lp.getLeaves()));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachNoSuccessors() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
LogicalPlan lp = planTester.buildPlan("B = foreach A generate flatten($1);");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
assertTrue(!pushDownForeach.check(lp.getLeaves()));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachStreaming() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate flatten($1);");
LogicalPlan lp = planTester.buildPlan("C = stream B through `" + simpleEchoStreamingCommand + "`;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachDistinct() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate flatten($1);");
LogicalPlan lp = planTester.buildPlan("C = distinct B;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachForeach() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten(1);");
LogicalPlan lp = planTester.buildPlan("C = foreach B generate $0;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachFilter() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
LogicalPlan lp = planTester.buildPlan("C = filter B by $1 < 18;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachSplitOutput() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
LogicalPlan lp = planTester.buildPlan("split B into C if $1 < 18, D if $1 >= 18;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachLimit() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
LogicalPlan lp = planTester.buildPlan("B = limit B 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachUnion() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference);");
LogicalPlan lp = planTester.buildPlan("D = union B, C;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachCogroup() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference);");
LogicalPlan lp = planTester.buildPlan("D = cogroup B by $0, C by $0;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachGroupBy() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
LogicalPlan lp = planTester.buildPlan("C = group B by $0;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachSort() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
LogicalPlan lp = planTester.buildPlan("C = order B by $0, $1;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOSort sort = (LOSort) lp.getLeaves().get(0);
LOForEach foreach = (LOForEach)lp.getPredecessors(sort).get(0);
assertTrue(pushDownForeach.check(lp.getPredecessors(sort)));
assertTrue(pushDownForeach.getSwap() == true);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
pushDownForeach.transform(lp.getPredecessors(sort));
assertEquals(foreach, lp.getLeaves().get(0));
assertEquals(sort, lp.getPredecessors(foreach).get(0));
}
@Test
public void testForeachFlattenAddedColumnSort() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, flatten(1);");
LogicalPlan lp = planTester.buildPlan("C = order B by $0, $1;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOSort sort = (LOSort) lp.getLeaves().get(0);
assertTrue(!pushDownForeach.check(lp.getPredecessors(sort)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachUDFSort() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate $0, $1, " + Identity.class.getName() + "($2) ;");
LogicalPlan lp = planTester.buildPlan("C = order B by $0, $1;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOSort sort = (LOSort) lp.getLeaves().get(0);
assertTrue(!pushDownForeach.check(lp.getPredecessors(sort)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachCastSort() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa);");
planTester.buildPlan("B = foreach A generate (chararray)$0, $1, flatten($2);");
LogicalPlan lp = planTester.buildPlan("C = order B by $0, $1;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOSort sort = (LOSort) lp.getLeaves().get(0);
assertTrue(!pushDownForeach.check(lp.getPredecessors(sort)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachCross() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference);");
planTester.buildPlan("D = cross B, C;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
LOLimit limit = (LOLimit) lp.getLeaves().get(0);
LOCross cross = (LOCross)lp.getPredecessors(limit).get(0);
LOForEach foreach = (LOForEach) lp.getPredecessors(cross).get(0);
Schema limitSchema = limit.getSchema();
assertTrue(pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == true);
assertTrue(pushDownForeach.getFlattenedColumnMap() != null);
pushDownForeach.transform(lp.getSuccessors(load));
planTester.rebuildSchema(lp);
for(Boolean b: foreach.getFlatten()) {
assertEquals(b.booleanValue(), false);
}
LOForEach newForeach = (LOForEach)lp.getSuccessors(cross).get(0);
List<Boolean> newForeachFlatten = newForeach.getFlatten();
Map<Integer, Integer> remap = pushDownForeach.getFlattenedColumnMap();
for(Integer key: remap.keySet()) {
Integer value = remap.get(key);
assertEquals(newForeachFlatten.get(value).booleanValue(), true);
}
assertTrue(Schema.equals(limitSchema, limit.getSchema(), false, true));
}
@Test
public void testForeachCross1() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("C = foreach B generate $0, $1, flatten($2);");
planTester.buildPlan("D = cross A, C;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(1);
LOLimit limit = (LOLimit) lp.getLeaves().get(0);
LOCross cross = (LOCross)lp.getPredecessors(limit).get(0);
LOForEach foreach = (LOForEach) lp.getPredecessors(cross).get(1);
Schema limitSchema = limit.getSchema();
assertTrue(pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == true);
assertTrue(pushDownForeach.getFlattenedColumnMap() != null);
pushDownForeach.transform(lp.getSuccessors(load));
planTester.rebuildSchema(lp);
for(Boolean b: foreach.getFlatten()) {
assertEquals(b.booleanValue(), false);
}
LOForEach newForeach = (LOForEach)lp.getSuccessors(cross).get(0);
List<Boolean> newForeachFlatten = newForeach.getFlatten();
Map<Integer, Integer> remap = pushDownForeach.getFlattenedColumnMap();
for(Integer key: remap.keySet()) {
Integer value = remap.get(key);
assertEquals(newForeachFlatten.get(value).booleanValue(), true);
}
assertTrue(Schema.equals(limitSchema, limit.getSchema(), false, true));
}
// TODO
// The following test case testForeachCross2 has multiple foreach flatten
// A new rule should optimize this case
@Test
public void testForeachCross2() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = foreach C generate $0, $1, flatten($2);");
planTester.buildPlan("E = cross B, D;");
LogicalPlan lp = planTester.buildPlan("F = limit E 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachFlattenAddedColumnCross() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten(1);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = cross B, C;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachUDFCross() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = cross B, C;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachCastCross() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, (int)$1, $2;");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = cross B, C;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachFRJoin() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference);");
planTester.buildPlan("D = join B by $0, C by $0 using \"replicated\";");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
LOLimit limit = (LOLimit) lp.getLeaves().get(0);
LOJoin frjoin = (LOJoin)lp.getPredecessors(limit).get(0);
LOForEach foreach = (LOForEach) lp.getPredecessors(frjoin).get(0);
Schema limitSchema = limit.getSchema();
assertTrue(pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == true);
assertTrue(pushDownForeach.getFlattenedColumnMap() != null);
pushDownForeach.transform(lp.getSuccessors(load));
planTester.rebuildSchema(lp);
for(Boolean b: foreach.getFlatten()) {
assertEquals(b.booleanValue(), false);
}
LOForEach newForeach = (LOForEach)lp.getSuccessors(frjoin).get(0);
List<Boolean> newForeachFlatten = newForeach.getFlatten();
Map<Integer, Integer> remap = pushDownForeach.getFlattenedColumnMap();
for(Integer key: remap.keySet()) {
Integer value = remap.get(key);
assertEquals(newForeachFlatten.get(value).booleanValue(), true);
}
assertTrue(Schema.equals(limitSchema, limit.getSchema(), false, true));
}
@Test
public void testForeachFRJoin1() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("C = foreach B generate $0, $1, flatten($2);");
planTester.buildPlan("D = join A by $0, C by $0 using \"replicated\";");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(1);
LOLimit limit = (LOLimit) lp.getLeaves().get(0);
LOJoin frjoin = (LOJoin)lp.getPredecessors(limit).get(0);
LOForEach foreach = (LOForEach) lp.getPredecessors(frjoin).get(1);
Schema limitSchema = limit.getSchema();
assertTrue(pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == true);
assertTrue(pushDownForeach.getFlattenedColumnMap() != null);
pushDownForeach.transform(lp.getSuccessors(load));
planTester.rebuildSchema(lp);
for(Boolean b: foreach.getFlatten()) {
assertEquals(b.booleanValue(), false);
}
LOForEach newForeach = (LOForEach)lp.getSuccessors(frjoin).get(0);
List<Boolean> newForeachFlatten = newForeach.getFlatten();
Map<Integer, Integer> remap = pushDownForeach.getFlattenedColumnMap();
for(Integer key: remap.keySet()) {
Integer value = remap.get(key);
assertEquals(newForeachFlatten.get(value).booleanValue(), true);
}
assertTrue(Schema.equals(limitSchema, limit.getSchema(), false, true));
}
// TODO
// The following test case testForeachFRJoin2 has multiple foreach flatten
// A new rule should optimize this case
@Test
public void testForeachFRJoin2() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = foreach C generate $0, $1, flatten($2);");
planTester.buildPlan("E = join B by $0, D by $0 using \"replicated\";");
LogicalPlan lp = planTester.buildPlan("F = limit E 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachFlattenAddedColumnFRJoin() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten(1);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = join B by $0, C by $0 using \"replicated\";");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachUDFFRJoin() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = join B by $0, C by $0 using \"replicated\";");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachCastFRJoin() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, (int)$1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = join B by $0, C by $0 using \"replicated\";");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachInnerJoin() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = join B by $0, C by $0;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(0);
LOLimit limit = (LOLimit) lp.getLeaves().get(0);
LOJoin join = (LOJoin)lp.getPredecessors(limit).get(0);
LOForEach foreach = (LOForEach) lp.getPredecessors(join).get(0);
Schema limitSchema = limit.getSchema();
assertTrue(pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == true);
assertTrue(pushDownForeach.getFlattenedColumnMap() != null);
pushDownForeach.transform(lp.getSuccessors(load));
planTester.rebuildSchema(lp);
for(Boolean b: foreach.getFlatten()) {
assertEquals(b.booleanValue(), false);
}
LOForEach newForeach = (LOForEach)lp.getSuccessors(join).get(0);
List<Boolean> newForeachFlatten = newForeach.getFlatten();
Map<Integer, Integer> remap = pushDownForeach.getFlattenedColumnMap();
for(Integer key: remap.keySet()) {
Integer value = remap.get(key);
assertEquals(newForeachFlatten.get(value).booleanValue(), true);
}
assertTrue(Schema.equals(limitSchema, limit.getSchema(), false, true));
}
@Test
public void testForeachInnerJoin1() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("C = foreach B generate $0, $1, flatten($2);");
planTester.buildPlan("D = join A by $0, C by $0;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad load = (LOLoad) lp.getRoots().get(1);
LOLimit limit = (LOLimit) lp.getLeaves().get(0);
LOJoin join = (LOJoin)lp.getPredecessors(limit).get(0);
LOForEach foreach = (LOForEach) lp.getPredecessors(join).get(1);
Schema limitSchema = limit.getSchema();
assertTrue(pushDownForeach.check(lp.getSuccessors(load)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == true);
assertTrue(pushDownForeach.getFlattenedColumnMap() != null);
pushDownForeach.transform(lp.getSuccessors(load));
planTester.rebuildSchema(lp);
for(Boolean b: foreach.getFlatten()) {
assertEquals(b.booleanValue(), false);
}
LOForEach newForeach = (LOForEach)lp.getSuccessors(join).get(0);
List<Boolean> newForeachFlatten = newForeach.getFlatten();
Map<Integer, Integer> remap = pushDownForeach.getFlattenedColumnMap();
for(Integer key: remap.keySet()) {
Integer value = remap.get(key);
assertEquals(newForeachFlatten.get(value).booleanValue(), true);
}
assertTrue(Schema.equals(limitSchema, limit.getSchema(), false, true));
}
// TODO
// The following test case testForeachInnerJoin2 has multiple foreach flatten
// A new rule should optimize this case
@Test
public void testForeachInnerJoin2() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = foreach C generate $0, $1, flatten($2);");
planTester.buildPlan("E = join B by $0, D by $0;");
LogicalPlan lp = planTester.buildPlan("F = limit E 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachFlattenAddedColumnInnerJoin() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, $1, flatten(1);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = join B by $0, C by $0;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachUDFInnerJoin() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, flatten($1), " + Identity.class.getName() + "($2) ;");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = join B by $0, C by $0;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
@Test
public void testForeachCastInnerJoin() throws Exception {
planTester.buildPlan("A = load 'myfile' as (name, age, gpa:(letter_grade, point_score));");
planTester.buildPlan("B = foreach A generate $0, (int)$1, flatten($2);");
planTester.buildPlan("C = load 'anotherfile' as (name, age, preference:(course_name, instructor));");
planTester.buildPlan("D = join B by $0, C by $0;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
assertTrue(pushDownForeach.getFlattenedColumnMap() == null);
}
// See PIG-1172
@Test
public void testForeachJoinRequiredField() throws Exception {
planTester.buildPlan("A = load 'myfile' as (bg:bag{t:tuple(a0,a1)});");
planTester.buildPlan("B = FOREACH A generate flatten($0);");
planTester.buildPlan("C = load '3.txt' AS (c0, c1);");
planTester.buildPlan("D = JOIN B by a1, C by c1;");
LogicalPlan lp = planTester.buildPlan("E = limit D 10;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
}
// See PIG-1374
@Test
public void testForeachRequiredField() throws Exception {
planTester.buildPlan("A = load 'myfile' as (b{t(a0:chararray,a1:int)});");
planTester.buildPlan("B = foreach A generate flatten($0);");
LogicalPlan lp = planTester.buildPlan("C = order B by $1 desc;");
planTester.setPlan(lp);
planTester.setProjectionMap(lp);
planTester.rebuildSchema(lp);
PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp);
LOLoad loada = (LOLoad) lp.getRoots().get(0);
assertTrue(!pushDownForeach.check(lp.getSuccessors(loada)));
assertTrue(pushDownForeach.getSwap() == false);
assertTrue(pushDownForeach.getInsertBetween() == false);
}
}