/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.test; import java.io.FileInputStream; import java.io.IOException; import java.util.*; import org.apache.pig.ExecType; import org.apache.pig.newplan.logical.LogicalPlanMigrationVistor; import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer; import org.apache.pig.newplan.logical.relational.LogicalPlan; import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter; import org.apache.pig.newplan.logical.rules.LimitOptimizer; import org.apache.pig.newplan.OperatorPlan; import org.apache.pig.newplan.optimizer.PlanOptimizer; import org.apache.pig.newplan.optimizer.Rule; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.logicalLayer.optimizer.OpLimitOptimizer; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.plan.optimizer.OptimizerException; import org.apache.pig.test.TestLogicalOptimizer.LogicalOptimizerDerivative; import org.apache.pig.test.utils.LogicalPlanTester; import junit.framework.Assert; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; public class TestOptimizeLimit { final String FILE_BASE_LOCATION = "test/org/apache/pig/test/data/DotFiles/" ; static final int MAX_SIZE = 100000; PigContext pc = new PigContext( ExecType.LOCAL, new Properties() ); LogicalPlanTester planTester = new LogicalPlanTester(pc) ; @BeforeClass public static void setup() { } @AfterClass public static void tearDown() { } void compareWithGoldenFile(LogicalPlan plan, String filename) throws Exception { String actualPlan = printLimitGraph(plan); System.out.println("We get:"); System.out.println(actualPlan); FileInputStream fis = new FileInputStream(filename); byte[] b = new byte[MAX_SIZE]; int len = fis.read(b); String goldenPlan = new String(b, 0, len); System.out.println("Expected:"); System.out.println(goldenPlan); Assert.assertEquals(goldenPlan, actualPlan + "\n"); } public static String printLimitGraph(LogicalPlan plan) throws IOException { OptimizeLimitPlanPrinter printer = new OptimizeLimitPlanPrinter(plan) ; String rep = "digraph graph1 {\n"; rep = rep + printer.printToString() ; rep = rep + "}"; return rep; } @Test // Merget limit into sort public void testOPLimit1Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile';"); planTester.buildPlan("B = order A by $0;"); planTester.buildPlan("C = limit B 100;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan( "store C into 'empty';" ); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan1.dot"); } @Test // Merge limit into limit public void testOPLimit2Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile';"); planTester.buildPlan("B = limit A 10;"); planTester.buildPlan("C = limit B 100;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan( "store C into 'empty';" ); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan2.dot"); } @Test // Duplicate limit with two inputs public void testOPLimit3Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile1';"); planTester.buildPlan("B = load 'myfile2';"); planTester.buildPlan("C = cross A, B;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan("D = limit C 100;"); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan3.dot"); } @Test // Duplicte limit with one input public void testOPLimit4Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile1';"); planTester.buildPlan("B = group A by $0;"); planTester.buildPlan("C = foreach B generate flatten(A);"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan("D = limit C 100;"); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan4.dot"); } @Test // Move limit up public void testOPLimit5Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile1';"); planTester.buildPlan("B = foreach A generate $0;"); planTester.buildPlan("C = limit B 100;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan( "store C into 'empty';" ); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan5.dot"); } @Test // Multiple LOLimit public void testOPLimit6Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile';"); planTester.buildPlan("B = limit A 50;"); planTester.buildPlan("C = limit B 20;"); planTester.buildPlan("D = limit C 100;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan( "store D into 'empty';" ); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan6.dot"); } @Test // Limit stay the same for ForEach with a flatten public void testOPLimit7Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile1';"); planTester.buildPlan("B = foreach A generate flatten($0);"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan("C = limit B 100;"); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan7.dot"); } @Test //Limit in the local mode, need to make sure limit stays after a sort public void testOPLimit8Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile';"); planTester.buildPlan("B = order A by $0;"); planTester.buildPlan("C = limit B 10;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan( "store C into 'empty';" ); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan8.dot"); } @Test public void testOPLimit9Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile';"); planTester.buildPlan("B = order A by $0;"); planTester.buildPlan("C = limit B 10;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan( "store C into 'empty';" ); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan9.dot"); } @Test //See bug PIG-913 public void testOPLimit10Optimizer() throws Exception { planTester.buildPlan("A = load 'myfile' AS (s:chararray);"); planTester.buildPlan("B = limit A 100;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan("C = GROUP B by $0;"); LogicalPlan newLogicalPlan = migrateAndOptimizePlan(plan); compareWithGoldenFile(newLogicalPlan, FILE_BASE_LOCATION + "new-optlimitplan10.dot"); } /** * Test that {@link OpLimitOptimizer} returns false on the check if * pre-conditions for pushing limit up are not met * @throws Exception */ @Test public void testOpLimitOptimizerCheck() throws Exception { planTester.buildPlan("A = load 'myfile';"); planTester.buildPlan("B = foreach A generate $0;"); org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan("C = limit B 100;"); LogicalOptimizerDerivative optimizer = new LogicalOptimizerDerivative(plan); int numIterations = optimizer.optimize(); Assert.assertFalse("Checking number of iterations of the optimizer [actual = " + numIterations + ", expected < " + optimizer.getMaxIterations() + "]", optimizer.getMaxIterations() == numIterations); } @Test //Test to ensure that the right exception is thrown public void testErrOpLimitOptimizer() throws Exception { org.apache.pig.impl.logicalLayer.LogicalPlan lp = new org.apache.pig.impl.logicalLayer.LogicalPlan(); OpLimitOptimizer olo = new OpLimitOptimizer(lp); try { olo.transform(lp.getRoots()); } catch(Exception e) { Assert.assertTrue(((OptimizerException)e).getErrorCode() == 2052); } } @Test //See bug PIG-995 //We shall throw no exception here public void testOPLimit11Optimizer() throws Exception { org.apache.pig.impl.logicalLayer.LogicalPlan plan = planTester.buildPlan("B = foreach (limit (order (load 'myfile' AS (a0, a1, a2)) by $1) 10) generate $0;"); migrateAndOptimizePlan(plan); } public class MyPlanOptimizer extends LogicalPlanOptimizer { protected MyPlanOptimizer(OperatorPlan p, int iterations) { super( p, iterations, new HashSet<String>() ); } protected List<Set<Rule>> buildRuleSets() { List<Set<Rule>> ls = new ArrayList<Set<Rule>>(); Set<Rule> s = null; Rule r = null; s = new HashSet<Rule>(); r = new LoadTypeCastInserter( "TypeCastInserter"); s.add(r); ls.add(s); s = new HashSet<Rule>(); r = new LimitOptimizer("OptimizeLimit"); s.add(r); ls.add(s); return ls; } } private LogicalPlan migrateAndOptimizePlan(org.apache.pig.impl.logicalLayer.LogicalPlan plan) throws IOException { LogicalPlan newLogicalPlan = migratePlan( plan ); PlanOptimizer optimizer = new MyPlanOptimizer( newLogicalPlan, 3 ); optimizer.optimize(); return newLogicalPlan; } private LogicalPlan migratePlan(org.apache.pig.impl.logicalLayer.LogicalPlan lp) throws VisitorException{ LogicalPlanMigrationVistor visitor = new LogicalPlanMigrationVistor(lp); visitor.visit(); org.apache.pig.newplan.logical.relational.LogicalPlan newPlan = visitor.getNewLogicalPlan(); return newPlan; } }