/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.test; import java.util.*; import org.apache.pig.ExecType; import org.apache.pig.data.DataType; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.util.MultiMap; import org.apache.pig.newplan.Operator; import org.apache.pig.newplan.OperatorPlan; import org.apache.pig.newplan.logical.LogicalPlanMigrationVistor; import org.apache.pig.newplan.logical.expression.*; import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer; import org.apache.pig.newplan.logical.optimizer.ProjectionPatcher; import org.apache.pig.newplan.logical.optimizer.SchemaPatcher; import org.apache.pig.newplan.logical.relational.LOCogroup; import org.apache.pig.newplan.logical.relational.LOFilter; import org.apache.pig.newplan.logical.relational.LOForEach; import org.apache.pig.newplan.logical.relational.LOJoin; import org.apache.pig.newplan.logical.relational.LOLoad; import org.apache.pig.newplan.logical.relational.LOStore; import org.apache.pig.newplan.logical.relational.LogicalPlan; import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator; import org.apache.pig.newplan.logical.relational.LogicalSchema; import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter; import org.apache.pig.newplan.logical.rules.MergeFilter; import org.apache.pig.newplan.logical.rules.PushUpFilter; import org.apache.pig.newplan.logical.rules.SplitFilter; import org.apache.pig.newplan.optimizer.PlanOptimizer; import org.apache.pig.newplan.optimizer.PlanTransformListener; import org.apache.pig.newplan.optimizer.Rule; import org.apache.pig.test.utils.LogicalPlanTester; import org.junit.Assert; import org.junit.Test; public class TestNewPlanFilterRule { PigContext pc = new PigContext(ExecType.LOCAL, new Properties()); LogicalPlanTester planTester = new LogicalPlanTester(pc) ; LogicalPlan plan = null; LogicalRelationalOperator load1 = null; LogicalRelationalOperator load2 = null; LogicalRelationalOperator filter = null; LogicalRelationalOperator join = null; LogicalRelationalOperator store = null; private void prep() { plan = new LogicalPlan(); LogicalSchema schema = new LogicalSchema(); schema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.INTEGER)); schema.addField(new LogicalSchema.LogicalFieldSchema("name", null, DataType.CHARARRAY)); schema.addField(new LogicalSchema.LogicalFieldSchema("age", null, DataType.INTEGER)); schema.getField(0).uid = 1; schema.getField(1).uid = 2; schema.getField(2).uid = 3; LogicalRelationalOperator l1 = new LOLoad(null, schema, plan, null); l1.setAlias("A"); plan.add(l1); schema = new LogicalSchema(); schema.addField(new LogicalSchema.LogicalFieldSchema("id", null, DataType.INTEGER)); schema.addField(new LogicalSchema.LogicalFieldSchema("dept", null, DataType.INTEGER)); schema.addField(new LogicalSchema.LogicalFieldSchema("salary", null, DataType.FLOAT)); schema.getField(0).uid = 4; schema.getField(1).uid = 5; schema.getField(2).uid = 6; LogicalRelationalOperator l2 = new LOLoad(null, schema, plan, null); l2.setAlias("B"); plan.add(l2); MultiMap<Integer, LogicalExpressionPlan> joinPlans = new MultiMap<Integer, LogicalExpressionPlan>(); LogicalRelationalOperator j1 = new LOJoin(plan, joinPlans, LOJoin.JOINTYPE.HASH, new boolean[]{true, true}); LogicalExpressionPlan p1 = new LogicalExpressionPlan(); ProjectExpression lp1 = new ProjectExpression(p1, 0, 1, j1); p1.add(lp1); joinPlans.put(0, p1); LogicalExpressionPlan p2 = new LogicalExpressionPlan(); ProjectExpression lp2 = new ProjectExpression(p2, 1, 1, j1); p2.add(lp2); joinPlans.put(1, p2); j1.setAlias("C"); plan.add(j1); // build an expression with no AND LogicalExpressionPlan p3 = new LogicalExpressionPlan(); LogicalRelationalOperator f1 = new LOFilter(plan, p3); LogicalExpression lp3 = new ProjectExpression(p3, 0, 2, f1); LogicalSchema.LogicalFieldSchema fs = new LogicalSchema.LogicalFieldSchema(null, null, DataType.INTEGER); LogicalExpression cont = new ConstantExpression(p3, new Integer(3), fs); p3.add(lp3); p3.add(cont); LogicalExpression eq = new EqualExpression(p3, lp3, cont); f1.setAlias("D"); plan.add(f1); LogicalRelationalOperator s1 = new LOStore(plan); plan.add(s1); // load --|-join - filter - store // load --| plan.connect(l1, j1); plan.connect(l2, j1); plan.connect(j1, f1); plan.connect(f1, s1); filter = f1; store = s1; join = j1; load1 = l1; load2 = l2; } @Test public void testFilterRule() throws Exception { prep(); // run split filter rule Rule r = new SplitFilter("SplitFilter"); Set<Rule> s = new HashSet<Rule>(); s.add(r); List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); ls.add(s); MyPlanOptimizer optimizer = new MyPlanOptimizer(plan, ls, 3); optimizer.optimize(); Assert.assertEquals(plan.getPredecessors(filter).get(0), join); Assert.assertEquals(plan.getSuccessors(filter).get(0), store); // run push up filter rule r = new PushUpFilter("PushUpFilter"); s = new HashSet<Rule>(); s.add(r); ls = new ArrayList<Set<Rule>>(); ls.add(s); optimizer = new MyPlanOptimizer(plan, ls, 3); optimizer.optimize(); // the filter should be moved up to be after load Assert.assertEquals(plan.getSuccessors(load1).get(0), filter); Assert.assertEquals(plan.getSuccessors(filter).get(0), join); Assert.assertEquals(plan.getSuccessors(join).get(0), store); // run merge filter rule r = new MergeFilter("MergeFilter"); s = new HashSet<Rule>(); s.add(r); ls = new ArrayList<Set<Rule>>(); ls.add(s); optimizer = new MyPlanOptimizer(plan, ls, 3); optimizer.optimize(); // the filter should the same as before, nothing to merge Assert.assertEquals(plan.getSuccessors(load1).get(0), filter); Assert.assertEquals(plan.getSuccessors(filter).get(0), join); Assert.assertEquals(plan.getSuccessors(join).get(0), store); } // build an expression with 1 AND, it should split into 2 filters @Test public void testFilterRuleWithAnd() throws Exception { prep(); LogicalExpressionPlan p4 = new LogicalExpressionPlan(); LogicalExpression lp3 = new ProjectExpression(p4, 0, 2, filter); LogicalSchema.LogicalFieldSchema fs = new LogicalSchema.LogicalFieldSchema(null, null, DataType.INTEGER); LogicalExpression cont = new ConstantExpression(p4, new Integer(3), fs); p4.add(lp3); p4.add(cont); LogicalExpression eq = new EqualExpression(p4, lp3, cont); LogicalExpression lp4 = new ProjectExpression(p4, 0, 5, filter); fs = new LogicalSchema.LogicalFieldSchema(null, null, DataType.FLOAT); LogicalExpression cont2 = new ConstantExpression(p4, new Float(100), fs); p4.add(lp4); p4.add(cont2); LogicalExpression eq2 = new EqualExpression(p4, lp4, cont2); LogicalExpression and = new AndExpression(p4, eq, eq2); ((LOFilter)filter).setFilterPlan(p4); // run split filter rule Rule r = new SplitFilter("SplitFilter"); Set<Rule> s = new HashSet<Rule>(); s.add(r); List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); ls.add(s); PlanOptimizer optimizer = new MyPlanOptimizer(plan, ls, 3); optimizer.optimize(); Assert.assertEquals(plan.getPredecessors(filter).get(0), join); Operator next = plan.getSuccessors(filter).get(0); Assert.assertEquals(LOFilter.class, next.getClass()); next = plan.getSuccessors(next).get(0); Assert.assertEquals(LOStore.class, next.getClass()); // run push up filter rule r = new PushUpFilter("PushUpFilter"); s = new HashSet<Rule>(); s.add(r); ls = new ArrayList<Set<Rule>>(); ls.add(s); optimizer = new MyPlanOptimizer(plan, ls, 3); optimizer.optimize(); // both filters should be moved up to be after each load next = plan.getSuccessors(load1).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); next = plan.getSuccessors(load2).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); Assert.assertEquals(plan.getSuccessors(join).get(0), store); // run merge filter rule r = new MergeFilter("MergeFilter"); s = new HashSet<Rule>(); s.add(r); ls = new ArrayList<Set<Rule>>(); ls.add(s); optimizer = new MyPlanOptimizer(plan, ls, 3); optimizer.optimize(); // the filters should the same as before, nothing to merge next = plan.getSuccessors(load1).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); next = plan.getSuccessors(load2).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); Assert.assertEquals(plan.getSuccessors(join).get(0), store); } @Test public void testFilterRuleWith2And() throws Exception { prep(); // build an expression with 2 AND, it should split into 3 filters LogicalExpressionPlan p5 = new LogicalExpressionPlan(); LogicalExpression lp3 = new ProjectExpression(p5, 0, 2, filter); LogicalSchema.LogicalFieldSchema fs = new LogicalSchema.LogicalFieldSchema(null, null, DataType.INTEGER); LogicalExpression cont = new ConstantExpression(p5, new Integer(3),fs); p5.add(lp3); p5.add(cont); LogicalExpression eq = new EqualExpression(p5, lp3, cont); LogicalExpression lp4 = new ProjectExpression(p5, 0, 3, filter); LogicalExpression cont2 = new ConstantExpression(p5, new Integer(3), fs); p5.add(lp4); p5.add(cont2); LogicalExpression eq2 = new EqualExpression(p5, lp4, cont2); LogicalExpression and1 = new AndExpression(p5, eq, eq2); lp3 = new ProjectExpression(p5, 0, 0, filter); lp4 = new ProjectExpression(p5, 0, 3, filter); p5.add(lp3); p5.add(lp4); eq2 = new EqualExpression(p5, lp3, lp4); LogicalExpression and2 = new AndExpression(p5, and1, eq2); ((LOFilter)filter).setFilterPlan(p5); Rule r = new SplitFilter("SplitFilter"); Set<Rule> s = new HashSet<Rule>(); s.add(r); List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); ls.add(s); MyPlanOptimizer optimizer = new MyPlanOptimizer(plan, ls, 3); MyPlanTransformListener listener = new MyPlanTransformListener(); optimizer.addPlanTransformListener(listener); optimizer.optimize(); Assert.assertEquals(plan.getPredecessors(filter).get(0), join); Operator next = plan.getSuccessors(filter).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(LOStore.class, next.getClass()); OperatorPlan transformed = listener.getTransformed(); Assert.assertEquals(transformed.size(), 3); // run push up filter rule r = new PushUpFilter("PushUpFilter"); s = new HashSet<Rule>(); s.add(r); ls = new ArrayList<Set<Rule>>(); ls.add(s); optimizer = new MyPlanOptimizer(plan, ls, 3); listener = new MyPlanTransformListener(); optimizer.addPlanTransformListener(listener); optimizer.optimize(); // 2 filters should be moved up to be after each load, and one filter should remain next = plan.getSuccessors(load1).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); next = plan.getSuccessors(load2).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); next = plan.getSuccessors(join).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(next.getClass(), LOStore.class); transformed = listener.getTransformed(); Assert.assertEquals(transformed.size(), 7); // run merge filter rule r = new MergeFilter("MergeFilter"); s = new HashSet<Rule>(); s.add(r); ls = new ArrayList<Set<Rule>>(); ls.add(s); optimizer = new MyPlanOptimizer(plan, ls, 3); listener = new MyPlanTransformListener(); optimizer.addPlanTransformListener(listener); optimizer.optimize(); // the filters should the same as before, nothing to merge next = plan.getSuccessors(load1).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); next = plan.getSuccessors(load2).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); next = plan.getSuccessors(join).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(next.getClass(), LOStore.class); transformed = listener.getTransformed(); Assert.assertNull(transformed); } @Test public void testFilterRuleWith2And2() throws Exception { prep(); // build an expression with 2 AND, it should split into 3 filters LogicalExpressionPlan p5 = new LogicalExpressionPlan(); LogicalExpression lp3 = new ProjectExpression(p5, 0, 2, filter); LogicalSchema.LogicalFieldSchema fs = new LogicalSchema.LogicalFieldSchema(null, null, DataType.INTEGER); LogicalExpression cont = new ConstantExpression(p5, new Integer(3),fs); p5.add(lp3); p5.add(cont); LogicalExpression eq = new EqualExpression(p5, lp3, cont); lp3 = new ProjectExpression(p5, 0, 0, filter); LogicalExpression lp4 = new ProjectExpression(p5, 0, 3, filter); p5.add(lp4); p5.add(lp3); LogicalExpression eq2 = new EqualExpression(p5, lp3, lp4); LogicalExpression and1 = new AndExpression(p5, eq, eq2); lp3 = new ProjectExpression(p5, 0, 2, filter); fs = new LogicalSchema.LogicalFieldSchema(null, null, DataType.FLOAT); lp4 = new ProjectExpression(p5, 0, 5, filter); p5.add(lp3); p5.add(lp4); eq2 = new EqualExpression(p5, lp3, lp4); LogicalExpression and2 = new AndExpression(p5, and1, eq2); ((LOFilter)filter).setFilterPlan(p5); Rule r = new SplitFilter("SplitFilter"); Set<Rule> s = new HashSet<Rule>(); s.add(r); List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); ls.add(s); MyPlanOptimizer optimizer = new MyPlanOptimizer(plan, ls, 3); optimizer.optimize(); Assert.assertEquals(plan.getPredecessors(filter).get(0), join); Operator next = plan.getSuccessors(filter).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(LOStore.class, next.getClass()); // run push up filter rule r = new PushUpFilter("PushUpFilter"); s = new HashSet<Rule>(); s.add(r); ls = new ArrayList<Set<Rule>>(); ls.add(s); optimizer = new MyPlanOptimizer(plan, ls, 3); optimizer.optimize(); // 1 filter should be moved up to be after a load, and 2 filters should remain next = plan.getSuccessors(load1).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); next = plan.getSuccessors(load2).get(0); Assert.assertEquals(next, join); next = plan.getSuccessors(join).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(next.getClass(), LOStore.class); // run merge filter rule r = new MergeFilter("MergeFilter"); s = new HashSet<Rule>(); s.add(r); ls = new ArrayList<Set<Rule>>(); ls.add(s); optimizer = new MyPlanOptimizer(plan, ls, 3); MyPlanTransformListener listener = new MyPlanTransformListener(); optimizer.addPlanTransformListener(listener); optimizer.optimize(); // the 2 filters after join should merge next = plan.getSuccessors(load1).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); Assert.assertEquals(plan.getSuccessors(next).get(0), join); next = plan.getSuccessors(load2).get(0); Assert.assertEquals(next, join); next = plan.getSuccessors(join).get(0); Assert.assertEquals(next.getClass(), LOFilter.class); next = plan.getSuccessors(next).get(0); Assert.assertEquals(next.getClass(), LOStore.class); OperatorPlan transformed = listener.getTransformed(); Assert.assertEquals(transformed.size(), 2); } // See pig-1639 @Test public void testFilterUDFNegative() throws Exception { planTester.buildPlan("A = load 'myfile' as (name, age, gpa);"); planTester.buildPlan("B = group A by age;"); planTester.buildPlan("C = filter B by COUNT(A) < 18;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan( "D = STORE C INTO 'empty';" ); LogicalPlan newLogicalPlan = migrateAndOptimizePlan( plan ); Operator load = newLogicalPlan.getSources().get( 0 ); Assert.assertTrue( load instanceof LOLoad ); Operator group = newLogicalPlan.getSuccessors( load ).get( 0 ); Assert.assertTrue( group instanceof LOCogroup ); Operator filter = newLogicalPlan.getSuccessors( group ).get( 0 ); Assert.assertTrue( filter instanceof LOFilter ); Operator store = newLogicalPlan.getSuccessors( filter ).get( 0 ); Assert.assertTrue( store instanceof LOStore ); } private LogicalPlan migrateAndOptimizePlan(org.apache.pig.impl.logicalLayer.LogicalPlan plan) throws FrontendException { LogicalPlan newLogicalPlan = migratePlan( plan ); PlanOptimizer optimizer = new NewPlanOptimizer( newLogicalPlan, 3 ); optimizer.optimize(); return newLogicalPlan; } private LogicalPlan migratePlan(org.apache.pig.impl.logicalLayer.LogicalPlan lp) throws VisitorException{ LogicalPlanMigrationVistor visitor = new LogicalPlanMigrationVistor(lp); visitor.visit(); org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = visitor.getNewLogicalPlan(); return newPlan; } public class NewPlanOptimizer extends LogicalPlanOptimizer { protected NewPlanOptimizer(OperatorPlan p, int iterations) { super(p, iterations, new HashSet<String>()); } public void addPlanTransformListener(PlanTransformListener listener) { super.addPlanTransformListener(listener); } protected List<Set<Rule>> buildRuleSets() { List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); Set<Rule> s = new HashSet<Rule>(); // add split filter rule Rule r = new LoadTypeCastInserter( "TypeCastInserter" ); s.add(r); ls.add(s); s = new HashSet<Rule>(); r = new PushUpFilter( "PushUpFilter" ); s.add(r); ls.add(s); return ls; } } public class MyPlanOptimizer extends PlanOptimizer { protected MyPlanOptimizer(OperatorPlan p, List<Set<Rule>> rs, int iterations) { super(p, rs, iterations); addPlanTransformListener(new SchemaPatcher()); addPlanTransformListener(new ProjectionPatcher()); } public void addPlanTransformListener(PlanTransformListener listener) { super.addPlanTransformListener(listener); } } public class MyPlanTransformListener implements PlanTransformListener { private OperatorPlan tp; @Override public void transformed(OperatorPlan fp, OperatorPlan tp) throws FrontendException { this.tp = tp; } public OperatorPlan getTransformed() { return tp; } } }