/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.ExecType;
import org.apache.pig.Expression;
import org.apache.pig.LoadMetadata;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceStatistics;
import org.apache.pig.builtin.BinStorage;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.LOPrinter;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.PlanSetter;
import org.apache.pig.impl.logicalLayer.optimizer.ImplicitSplitInserter;
import org.apache.pig.impl.logicalLayer.optimizer.LogicalOptimizer;
import org.apache.pig.impl.logicalLayer.optimizer.OpLimitOptimizer;
import org.apache.pig.impl.logicalLayer.optimizer.TypeCastInserter;
import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.apache.pig.impl.plan.optimizer.OptimizerException;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.test.utils.LogicalPlanTester;
import org.junit.Test;
/**
* Test the logical optimizer.
*/
public class TestLogicalOptimizer extends junit.framework.TestCase {
final String FILE_BASE_LOCATION = "test/org/apache/pig/test/data/DotFiles/" ;
static final int MAX_SIZE = 100000;
PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
LogicalPlanTester planTester = new LogicalPlanTester(pc) ;
/*
@Before
public void setUp() {
planTester.reset();
}*/
public static String printLimitGraph(LogicalPlan plan) {
OpLimitOptimizerPrinter printer = new OpLimitOptimizerPrinter(plan) ;
String rep = "digraph graph1 {\n";
rep = rep + printer.printToString() ;
rep = rep + "}";
return rep;
}
public static int optimizePlan(LogicalPlan plan) throws Exception
{
LogicalOptimizer optimizer = new LogicalOptimizer(plan);
return optimizer.optimize();
}
public static void optimizePlan(LogicalPlan plan, ExecType mode) throws OptimizerException {
LogicalOptimizer optimizer = new LogicalOptimizer(plan, mode);
optimizer.optimize();
}
void compareWithGoldenFile(LogicalPlan plan, String filename) throws Exception
{
FileInputStream fis = new FileInputStream(filename);
byte[] b = new byte[MAX_SIZE];
int len = fis.read(b);
String goldenPlan = new String(b, 0, len);
String actualPlan = printLimitGraph(plan);
System.out.println("We get:");
System.out.println(actualPlan);
assertEquals(goldenPlan, actualPlan + "\n");
}
@Test
public void testTypeCastInsertion() throws Exception {
planTester.buildPlan("A = load 'myfile' as (p:int, q:long, r:float, "
+ "s:double, t:map [], u:tuple (x:int, y:int), " +
"v:bag {x:tuple(z:int)});");
LogicalPlan plan = planTester.buildPlan("B = order A by p;");
planTester.typeCheckAgainstDotFile(plan, FILE_BASE_LOCATION +
"optplan1.dot", true);
}
@Test
// Merget limit into sort
public void testOPLimit1Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile';");
planTester.buildPlan("B = order A by $0;");
LogicalPlan plan = planTester.buildPlan("C = limit B 100;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan1.dot");
}
@Test
// Merge limit into limit
public void testOPLimit2Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile';");
planTester.buildPlan("B = limit A 10;");
LogicalPlan plan = planTester.buildPlan("C = limit B 100;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan2.dot");
}
@Test
// Duplicate limit with two inputs
public void testOPLimit3Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile1';");
planTester.buildPlan("B = load 'myfile2';");
planTester.buildPlan("C = cross A, B;");
LogicalPlan plan = planTester.buildPlan("D = limit C 100;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan3.dot");
}
@Test
// Duplicte limit with one input
public void testOPLimit4Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile1';");
planTester.buildPlan("B = group A by $0;");
planTester.buildPlan("C = foreach B generate flatten(A);");
LogicalPlan plan = planTester.buildPlan("D = limit C 100;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan4.dot");
}
@Test
// Move limit up
public void testOPLimit5Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile1';");
planTester.buildPlan("B = foreach A generate $0;");
LogicalPlan plan = planTester.buildPlan("C = limit B 100;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan5.dot");
}
@Test
// Multiple LOLimit
public void testOPLimit6Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile';");
planTester.buildPlan("B = limit A 50;");
planTester.buildPlan("C = limit B 20;");
LogicalPlan plan = planTester.buildPlan("D = limit C 100;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan6.dot");
}
@Test
// Limit stay the same for ForEach with a flatten
public void testOPLimit7Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile1';");
planTester.buildPlan("B = foreach A generate flatten($0);");
LogicalPlan plan = planTester.buildPlan("C = limit B 100;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan7.dot");
}
@Test
//Limit in the local mode, need to make sure limit stays after a sort
public void testOPLimit8Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile';");
planTester.buildPlan("B = order A by $0;");
LogicalPlan plan = planTester.buildPlan("C = limit B 10;");
optimizePlan(plan, ExecType.LOCAL);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan8.dot");
}
@Test
//Limit in the local mode, need to make sure limit stays after a sort
public void testOPLimit9Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile';");
planTester.buildPlan("B = order A by $0;");
LogicalPlan plan = planTester.buildPlan("C = limit B 10;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan9.dot");
}
@Test
//See bug PIG-913
public void testOPLimit10Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile' AS (s:chararray);");
planTester.buildPlan("B = limit A 100;");
LogicalPlan plan = planTester.buildPlan("C = GROUP B by $0;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan10.dot");
}
/**
* Test that {@link OpLimitOptimizer} returns false on the check if
* pre-conditions for pushing limit up are not met
* @throws Exception
*/
@Test
public void testOpLimitOptimizerCheck() throws Exception {
planTester.buildPlan("A = load 'myfile';");
planTester.buildPlan("B = foreach A generate $0;");
LogicalPlan plan = planTester.buildPlan("C = limit B 100;");
LogicalOptimizerDerivative optimizer = new LogicalOptimizerDerivative(plan);
int numIterations = optimizer.optimize();
assertFalse("Checking number of iterations of the optimizer [actual = "
+ numIterations + ", expected < " + optimizer.getMaxIterations() +
"]", optimizer.getMaxIterations() == numIterations);
}
@Test
//Test to ensure that the right exception is thrown
public void testErrImplicitSplitInserter() throws Exception {
LogicalPlan lp = new LogicalPlan();
ImplicitSplitInserter isi = new ImplicitSplitInserter(lp);
try {
isi.transform(lp.getRoots());
} catch(Exception e) {
assertTrue(((OptimizerException)e).getErrorCode() == 2052);
}
}
@Test
//Test to ensure that the right exception is thrown
public void testErrTypeCastInserter() throws Exception {
LogicalPlan lp = new LogicalPlan();
TypeCastInserter tci = new TypeCastInserter(lp, "hello");
try {
tci.transform(lp.getRoots());
} catch(Exception e) {
assertTrue(((OptimizerException)e).getErrorCode() == 2052);
}
}
@Test
//Test to ensure that the right exception is thrown
public void testErrOpLimitOptimizer() throws Exception {
LogicalPlan lp = new LogicalPlan();
OpLimitOptimizer olo = new OpLimitOptimizer(lp);
try {
olo.transform(lp.getRoots());
} catch(Exception e) {
assertTrue(((OptimizerException)e).getErrorCode() == 2052);
}
}
@Test
//See bug PIG-995
//We shall throw no exception here
public void testOPLimit11Optimizer() throws Exception {
LogicalPlan plan = planTester.buildPlan("B = foreach (limit (order (load 'myfile' AS (a0, a1, a2)) by $1) 10) generate $0;");
optimizePlan(plan);
}
@Test
//See bug PIG-1445
public void testOPLimit12Optimizer() throws Exception {
planTester.buildPlan("A = load 'myfile';");
planTester.buildPlan("B = STREAM A THROUGH `stream.pl`;");
LogicalPlan plan = planTester.buildPlan("C = LIMIT B 10;");
optimizePlan(plan);
compareWithGoldenFile(plan, FILE_BASE_LOCATION + "optlimitplan12.dot");
}
/**
* test to check that {@link LoadMetadata#getSchema(String, Job)} is called
* only once even if the optimizer is fired and schemas and projection maps
* are rebuilt
*/
@Test
public void testLoadGetSchemaCalledOnce() throws Exception {
String checkFileName = "checkLoadGetSchemaCalledOnce.txt";
new File(checkFileName).delete();
try{
planTester.buildPlan("A = load 'myfile' using "
+ DummyMetadataLoader.class.getName() + "('"+ checkFileName +"');");
planTester.buildPlan("B = foreach A generate $0 ;");
LogicalPlan plan = planTester.buildPlan("C = limit B 10;");
new LOPrinter(System.err, plan).visit();
// Set the logical plan values correctly in all the operators
PlanSetter ps = new PlanSetter(plan);
ps.visit();
// the optimizer should run atleast one iteration
LogicalOptimizerDerivative optimizer =
new LogicalOptimizerDerivative(plan);
int numIterations = optimizer.optimize();
assertTrue(numIterations > 0);
assertTrue(new File(checkFileName).exists());
} finally {
new File(checkFileName).delete();
}
}
// a subclass of LogicalOptimizer which can return the maximum iterations
// the optimizer would try the check() and transform() methods
static class LogicalOptimizerDerivative extends LogicalOptimizer {
public LogicalOptimizerDerivative(LogicalPlan plan) {
super(plan);
}
public int getMaxIterations() {
return mMaxIterations;
}
}
/**
* A dummy loader which extends {@link LoadMetadata} and in the
* {@link LoadMetadata#getSchema(String, Job)} implementation checks that
* the method is only called once.
*/
public static class DummyMetadataLoader extends BinStorage implements LoadMetadata {
String checkFileName;
public DummyMetadataLoader() {
}
public DummyMetadataLoader(String checkFileName) {
this.checkFileName = checkFileName;
}
@Override
public String[] getPartitionKeys(String location, Job job)
throws IOException {
return null;
}
@Override
public ResourceSchema getSchema(String location, Job job)
throws IOException {
try {
// the create() below will fail is this method gets called
// more than once
if(!new File(checkFileName).createNewFile()) {
throw new RuntimeException(checkFileName + " already exists!");
}
return new ResourceSchema(
Utils.getSchemaFromString("a:chararray,b:int"));
} catch (ParseException e) {
throw new IOException(e);
}
}
@Override
public ResourceStatistics getStatistics(String location, Job job)
throws IOException {
return null;
}
@Override
public void setPartitionFilter(Expression partitionFilter)
throws IOException {
}
}
}