/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.net.URL;
import java.util.List;
import java.util.ArrayList;
import java.util.Set;
import junit.framework.AssertionFailedError;
import org.junit.Test;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.pig.LoadCaster;
import org.apache.pig.LoadFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.PigServer;
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.PigContext;
import org.apache.pig.ExecType;
import org.apache.pig.impl.builtin.GFAny;
import org.apache.pig.impl.io.BufferedPositionedInputStream;
import org.apache.pig.impl.plan.CompilationMessageCollector;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanValidationException;
import org.apache.pig.impl.logicalLayer.*;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.validators.SchemaAliasValidator;
import org.apache.pig.impl.logicalLayer.validators.TypeCheckingValidator;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.parser.ParseException ;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.test.utils.Identity;
import org.apache.pig.impl.util.LogUtils;
import org.apache.pig.PigException;
public class TestLogicalPlanBuilder extends junit.framework.TestCase {
MiniCluster cluster = MiniCluster.buildCluster();
private final Log log = LogFactory.getLog(getClass());
@Test
public void testQuery1() {
String query = "foreach (load 'a') generate $1,$2;";
buildPlan(query);
}
@Test
public void testQuery2() {
String query = "foreach (load 'a' using " + PigStorage.class.getName() + "(':')) generate $1, 'aoeuaoeu' ;";
buildPlan(query);
}
// TODO FIX Query3 and Query4
@Test
public void testQuery3() {
String query = "foreach (cogroup (load 'a') by $1, (load 'b') by $1) generate org.apache.pig.builtin.AVG($1) ;";
buildPlan(query);
}
@Test
public void testQuery4() {
String query = "foreach (load 'a') generate AVG($1, $2) ;";
buildPlan(query);
}
@Test
public void testQuery5() {
String query = "foreach (group (load 'a') ALL) generate $1 ;";
buildPlan(query);
}
@Test
public void testQuery6() {
String query = "foreach (group (load 'a') by $1) generate group, '1' ;";
buildPlan(query);
}
@Test
public void testQuery7() {
String query = "foreach (load 'a' using " + PigStorage.class.getName() + "()) generate $1 ;";
buildPlan(query);
}
@Test
public void testQuery10() {
String query = "foreach (cogroup (load 'a') by ($1), (load 'b') by ($1)) generate $1.$1, $2.$1 ;";
buildPlan(query);
}
// TODO FIX Query11 and Query12
@Test
public void testQuery11() {
String query = " foreach (group (load 'a') by $1, (load 'b') by $2) generate group, AVG($1) ;";
buildPlan(query);
}
@Test
public void testQuery12() {
String query = "foreach (load 'a' using " + PigStorage.class.getName() + "()) generate AVG($1) ;";
buildPlan(query);
}
@Test
public void testQuery13() {
String query = "foreach (cogroup (load 'a') ALL) generate group ;";
buildPlan(query);
}
@Test
public void testQuery14() {
String query = "foreach (group (load 'a') by ($6, $7)) generate flatten(group) ;";
buildPlan(query);
}
@Test
public void testQuery15() {
String query = " foreach (load 'a') generate $1, 'hello', $3 ;";
buildPlan(query);
}
@Test
public void testQuery100() {
// test define syntax
String query = "define FUNC ARITY();";
LogicalOperator lo = buildPlan(query).getRoots().get(0);
assertTrue(lo instanceof LODefine);
}
@Test
public void testQueryFail1() {
String query = " foreach (group (A = load 'a') by $1) generate A.'1' ;";
try {
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQueryFail2() {
String query = "foreach group (load 'a') by $1 generate $1.* ;";
try {
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQueryFail3() {
String query = "generate DISTINCT foreach (load 'a');";
try {
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQueryFail4() {
String query = "generate [ORDER BY $0][$3, $4] foreach (load 'a');";
try {
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQueryFail5() {
String query = "generate " + TestApplyFunc.class.getName() + "($2.*) foreach (load 'a');";
try {
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
/**
* User generate functions must be in default package Bug 831620 - fixed
*/
// TODO FIX Query17
@Test
public void testQuery17() {
String query = "foreach (load 'A')" + "generate " + TestApplyFunc.class.getName() + "($1);";
buildPlan(query);
}
static public class TestApplyFunc extends org.apache.pig.EvalFunc<Tuple> {
@Override
public Tuple exec(Tuple input) throws IOException {
Tuple output = TupleFactory.getInstance().newTuple(input.getAll());
return output;
}
}
/**
* Validate that parallel is parsed correctly Bug 831714 - fixed
*/
@Test
public void testQuery18() {
String query = "FOREACH (group (load 'a') ALL PARALLEL 16) generate group;";
LogicalPlan lp = buildPlan(query);
LogicalOperator root = lp.getRoots().get(0);
List<LogicalOperator> listOp = lp.getSuccessors(root);
LogicalOperator lo = listOp.get(0);
if (lo instanceof LOCogroup) {
assertTrue(((LOCogroup) lo).getRequestedParallelism() == 16);
} else {
fail("Error: Unexpected Parse Tree output");
}
}
@Test
public void testQuery19() {
buildPlan("a = load 'a';");
buildPlan("b = filter a by $1 == '3';");
}
@Test
public void testQuery20() {
String query = "foreach (load 'a') generate ($1 == '3'? $2 : $3) ;";
buildPlan(query);
}
@Test
public void testQuery21() {
buildPlan("A = load 'a';");
buildPlan("B = load 'b';");
buildPlan("foreach (cogroup A by ($1), B by ($1)) generate A, flatten(B.($1, $2, $3));");
}
@Test
public void testQuery22() {
buildPlan("A = load 'a';");
buildPlan("B = load 'b';");
buildPlan("C = cogroup A by ($1), B by ($1);");
String query = "foreach C { " +
"B = order B by $0; " +
"generate FLATTEN(A), B.($1, $2, $3) ;" +
"};" ;
buildPlan(query);
}
@Test
public void testQuery22Fail() {
buildPlan("A = load 'a' as (a:int, b: double);");
try {
buildPlan("B = group A by (*, $0);");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Grouping attributes can either be star (*"));
}
}
@Test
public void testQuery23() {
buildPlan("A = load 'a';");
buildPlan("B = load 'b';");
buildPlan("C = cogroup A by ($1), B by ($1);");
String query = "foreach C { " +
"A = Distinct A; " +
"B = FILTER A BY $1 < 'z'; " +
//TODO
//A sequence of filters within a foreach translates to
//a split statement. Currently it breaks as adding an
//additional output to the filter fails as filter supports
//single output
"C = FILTER A BY $2 == $3;" +
"B = ARRANGE B BY $1;" +
"GENERATE A, FLATTEN(B.$0);" +
"};";
buildPlan(query);
}
@Test
public void testQuery23Fail() {
buildPlan("A = load 'a' as (a: int, b:double);");
buildPlan("B = load 'b';");
boolean exceptionThrown = false;
try {
buildPlan("C = cogroup A by (*, $0), B by ($0, $1);");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("The arity of cogroup/group by columns " +
"do not match"));
exceptionThrown = true;
}
assertTrue(exceptionThrown);
}
@Test
public void testQuery23Fail2() {
buildPlan("A = load 'a';");
buildPlan("B = load 'b';");
boolean exceptionThrown = false;
try {
buildPlan("C = cogroup A by (*, $0), B by ($0, $1);");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Cogroup/Group by * is only allowed if " +
"the input has a schema"));
exceptionThrown = true;
}
assertTrue(exceptionThrown);
}
@Test
public void testQuery23Fail3() {
buildPlan("A = load 'a' as (a: int, b:double);");
buildPlan("B = load 'b' as (a:int);");
boolean exceptionThrown = false;
try {
buildPlan("C = cogroup A by *, B by *;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("The arity of cogroup/group by columns " +
"do not match"));
exceptionThrown = true;
}
assertTrue(exceptionThrown);
}
@Test
public void testQuery24() {
buildPlan("a = load 'a';");
String query = "foreach a generate (($0 == $1) ? 'a' : $2), $4 ;";
buildPlan(query);
}
@Test
public void testQuery25() {
String query = "foreach (load 'a') {" +
"B = FILTER $0 BY (($1 == $2) AND ('a' < 'b'));" +
"generate B;" +
"};";
buildPlan(query);
}
@Test
public void testQuery26() {
String query = "foreach (load 'a') generate ((NOT (($1 == $2) OR ('a' < 'b'))) ? 'a' : $2), 'x' ;";
buildPlan(query);
}
// TODO FIX Query27 and Query28
@Test
public void testQuery27() {
String query = "foreach (load 'a'){" +
"A = DISTINCT $3.$1;" +
" generate " + TestApplyFunc.class.getName() + "($2, $1.($1, $4));" +
"};";
buildPlan(query);
}
@Test
public void testQuery28() {
String query = "foreach (load 'a') generate " + TestApplyFunc.class.getName() + "($2, " + TestApplyFunc.class.getName() + "($2.$3));";
buildPlan(query);
}
@Test
public void testQuery29() {
String query = "load 'myfile' using " + TestStorageFunc.class.getName() + "() as (col1);";
buildPlan(query);
}
@Test
public void testQuery30() {
String query = "load 'myfile' using " + TestStorageFunc.class.getName() + "() as (col1, col2);";
buildPlan(query);
}
public static class TestStorageFunc extends LoadFunc{
public Tuple getNext() throws IOException {
return null;
}
@Override
public InputFormat getInputFormat() throws IOException {
return null;
}
@Override
public LoadCaster getLoadCaster() throws IOException {
return null;
}
@Override
public void prepareToRead(RecordReader reader, PigSplit split)
throws IOException {
}
@Override
public String relativeToAbsolutePath(String location, Path curDir)
throws IOException {
return null;
}
@Override
public void setLocation(String location, Job job) throws IOException {
}
}
@Test
public void testQuery31() {
String query = "load 'myfile' as (col1, col2);";
buildPlan(query);
}
@Test
public void testQuery32() {
String query = "foreach (load 'myfile' as (col1, col2 : tuple(sub1, sub2), col3 : tuple(bag1))) generate col1 ;";
buildPlan(query);
}
@Test
public void testQuery33() {
buildPlan("A = load 'a' as (aCol1, aCol2);");
buildPlan("B = load 'b' as (bCol1, bCol2);");
buildPlan("C = cogroup A by (aCol1), B by bCol1;");
String query = "foreach C generate group, A.aCol1;";
buildPlan(query);
}
@Test
//TODO: Nested schemas don't work now. Probably a bug in the new parser.
public void testQuery34() {
buildPlan("A = load 'a' as (aCol1, aCol2 : tuple(subCol1, subCol2));");
buildPlan("A = filter A by aCol2 == '1';");
buildPlan("B = load 'b' as (bCol1, bCol2);");
String query = "foreach (cogroup A by (aCol1), B by bCol1 ) generate A.aCol2, B.bCol2 ;";
buildPlan(query);
}
@Test
public void testQuery35() {
String query = "foreach (load 'a' as (col1, col2)) generate col1, col2 ;";
buildPlan(query);
}
@Test
public void testQuery36() {
String query = "foreach (cogroup ( load 'a' as (col1, col2)) by col1) generate $1.(col2, col1);";
buildPlan(query);
}
@Test
public void testQueryFail37() {
String query = "A = load 'a'; asdasdas";
try{
buildPlan(query);
}catch(AssertionFailedError e){
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery38(){
String query = "c = cross (load 'a'), (load 'b');";
buildPlan(query);
}
// TODO FIX Query39 and Query40
@Test
public void testQuery39(){
buildPlan("a = load 'a' as (url, host, rank);");
buildPlan("b = group a by (url,host); ");
LogicalPlan lp = buildPlan("c = foreach b generate flatten(group.url), SUM(a.rank) as totalRank;");
buildPlan("d = filter c by totalRank > '10';");
buildPlan("e = foreach d generate totalRank;");
}
@Test
public void testQueryFail39(){
buildPlan("a = load 'a' as (url, host, rank);");
buildPlan("b = group a by (url,host); ");
LogicalPlan lp = buildPlan("c = foreach b generate flatten(group.url), SUM(a.rank) as totalRank;");
buildPlan("d = filter c by totalRank > '10';");
try {
buildPlan("e = foreach d generate url;");//url has been falttened and hence the failure
} catch(AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery40() {
buildPlan("a = FILTER (load 'a') BY IsEmpty($2);");
buildPlan("a = FILTER (load 'a') BY (IsEmpty($2) AND ($3 == $2));");
}
@Test
public void testQueryFail41() {
buildPlan("a = load 'a';");
try {
buildPlan("b = a as (host,url);");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Currently PIG does not support assigning an existing relation"));
}
// TODO
// the following statement was earlier present
// eventually when we do allow assignments of the form
// above, we should test with the line below
// uncommented
//buildPlan("foreach b generate host;");
}
@Test
public void testQuery42() {
buildPlan("a = load 'a';");
buildPlan("b = foreach a generate $0 as url, $1 as rank;");
buildPlan("foreach b generate url;");
}
@Test
public void testQuery43() {
buildPlan("a = load 'a' as (url,hitCount);");
buildPlan("b = load 'a' as (url,rank);");
buildPlan("c = cogroup a by url, b by url;");
buildPlan("d = foreach c generate group,flatten(a),flatten(b);");
buildPlan("e = foreach d generate group, a::url, b::url, b::rank, rank;");
}
@Test
public void testQueryFail43() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = load 'b' as (name, height);");
try {
String query = "c = cogroup a by (name, age), b by (height);";
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery44() {
buildPlan("a = load 'a' as (url, pagerank);");
buildPlan("b = load 'b' as (url, query, rank);");
buildPlan("c = cogroup a by (pagerank#'nonspam', url) , b by (rank/'2', url) ;");
buildPlan("foreach c generate group.url;");
}
//TODO
//Commented out testQueryFail44 as I am not able to include org.apache.pig.PigServer;
@Test
public void testQueryFail44() throws Throwable {
PigServer pig = null;
try {
pig = new PigServer("local");
} catch (IOException e) {
assertTrue(false); // pig server failed for some reason
}
pig.registerFunction("myTr",
new FuncSpec(GFAny.class.getName() + "('tr o 0')"));
try{
pig.registerQuery("b = foreach (load 'a') generate myTr(myTr(*));");
}catch(Exception e){
return;
}
assertTrue(false);
}
/*
// Select
public void testQuery45() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("B = select url, hitCount from A;");
buildPlan("C = select url, hitCount from B;");
}
//Select + Join
public void testQuery46() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("B = load 'b' as (url,pageRank);");
buildPlan("C = select A.url, A.hitCount, B.pageRank from A join B on A.url == B.url;");
}
// Mutliple Joins
public void testQuery47() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("B = load 'b' as (url,pageRank);");
buildPlan("C = load 'c' as (pageRank, position);");
buildPlan("B = select A.url, A.hitCount, B.pageRank from (A join B on A.url == B.url) join C on B.pageRank == C.pageRank;");
}
// Group
public void testQuery48() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("C = select A.url, AVG(A.hitCount) from A group by url;");
}
// Join + Group
public void testQuery49() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("B = load 'b' as (url,pageRank);");
buildPlan("C = select A.url, AVG(B.pageRank), SUM(A.hitCount) from A join B on A.url == B.url group by A.url;");
}
// Group + Having
public void testQuery50() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("C = select A.url, AVG(A.hitCount) from A group by url having AVG(A.hitCount) > '6';");
}
// Group + Having + Order
public void testQuery51() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("C = select A.url, AVG(A.hitCount) from A group by url order by A.url;");
}
// Group + Having + Order
public void testQuery52() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("C = select A.url, AVG(A.hitCount) from A group by url having AVG(A.hitCount) > '6' order by A.url;");
}
// Group + Having + Order 2
public void testQuery53() {
buildPlan("A = load 'a' as (url,hitCount);");
buildPlan("C = select A.url, AVG(A.hitCount) from A group by url having AVG(A.hitCount) > '6' order by AVG(A.hitCount);");
}
// Group + Having + Order 2
public void testQuery54() {
buildPlan("A = load 'a' as (url,hitCount, size);");
buildPlan("C = select A.url, AVG(A.hitCount) from A group by url having AVG(A.size) > '6' order by AVG(A.hitCount);");
}
// Group + Having + Order 2
public void testQuery55() {
buildPlan("A = load 'a' as (url,hitCount, size);");
buildPlan("C = select A.url, AVG(A.hitCount), SUM(A.size) from A group by url having AVG(A.size) > '6' order by AVG(A.hitCount);");
}
// Group + Having + Order 2
public void testQuery56() {
buildPlan("A = load 'a' as (url,hitCount, date);");
buildPlan("C = select A.url, A.date, SUM(A.hitCount) from A group by url, date having AVG(A.hitCount) > '6' order by A.date;");
}
*/
@Test
public void testQuery57() {
String query = "foreach (load 'a') generate ($1+$2), ($1-$2), ($1*$2), ($1/$2), ($1%$2), -($1) ;";
buildPlan(query);
}
@Test
public void testQuery58() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = group a by name;");
String query = "foreach b {d = a.name; generate group, d;};";
buildPlan(query);
}
@Test
public void testQueryFail58(){
buildPlan("a = load 'a' as (url, host, rank);");
buildPlan("b = group a by url; ");
try {
LogicalPlan lp = buildPlan("c = foreach b generate group.url;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery59() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = load 'b' as (name, height);");
String query = "c = join a by name, b by name;";
buildPlan(query);
}
@Test
public void testQuery60() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = load 'b' as (name, height);");
String query = "c = cross a,b;";
buildPlan(query);
}
@Test
public void testQuery61() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = load 'b' as (name, height);");
String query = "c = union a,b;";
buildPlan(query);
}
@Test
public void testQuery62() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = load 'b' as (name, height);");
String query = "c = cross a,b;";
buildPlan(query);
buildPlan("d = order c by b::name, height, a::gpa;");
buildPlan("e = order a by name, age, gpa desc;");
buildPlan("f = order a by $0 asc, age, gpa desc;");
buildPlan("g = order a by * asc;");
buildPlan("h = cogroup a by name, b by name;");
buildPlan("i = foreach h {i1 = order a by *; generate i1;};");
}
@Test
public void testQueryFail62() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = load 'b' as (name, height);");
String query = "c = cross a,b;";
buildPlan(query);
try {
buildPlan("d = order c by name, b::name, height, a::gpa;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery63() {
buildPlan("a = load 'a' as (name, details: tuple(age, gpa));");
buildPlan("b = group a by details;");
String query = "d = foreach b generate group.age;";
buildPlan(query);
buildPlan("e = foreach a generate name, details;");
}
@Test
public void testQueryFail63() {
String query = "foreach (load 'myfile' as (col1, col2 : (sub1, sub2), col3 : (bag1))) generate col1 ;";
try {
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery64() {
buildPlan("a = load 'a' as (name: chararray, details: tuple(age, gpa), mymap: map[]);");
buildPlan("c = load 'a' as (name, details: bag{mytuple: tuple(age: int, gpa)});");
buildPlan("b = group a by details;");
String query = "d = foreach b generate group.age;";
buildPlan(query);
buildPlan("e = foreach a generate name, details;");
buildPlan("f = LOAD 'myfile' AS (garage: bag{tuple1: tuple(num_tools: int)}, links: bag{tuple2: tuple(websites: chararray)}, page: bag{something_stupid: tuple(yeah_double: double)}, coordinates: bag{another_tuple: tuple(ok_float: float, bite_the_array: bytearray, bag_of_unknown: bag{})});");
}
@Test
public void testQueryFail64() {
String query = "foreach (load 'myfile' as (col1, col2 : bag{age: int})) generate col1 ;";
try {
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery65() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = load 'b' as (name, height);");
buildPlan("c = cogroup a by (name, age), b by (name, height);");
buildPlan("d = foreach c generate group.name, a.name as aName, b.name as b::name;");
}
@Test
public void testQueryFail65() {
buildPlan("a = load 'a' as (name, age, gpa);");
buildPlan("b = load 'b' as (name, height);");
buildPlan("c = cogroup a by (name, age), b by (name, height);");
try {
buildPlan("d = foreach c generate group.name, a.name, b.height as age, a.age;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery67() {
buildPlan(" a = load 'input1' as (name, age, gpa);");
buildPlan(" b = foreach a generate age, age * 10L, gpa/0.2f, {(16, 4.0e-2, 'hello')};");
}
@Test
public void testQuery68() {
buildPlan(" a = load 'input1';");
buildPlan(" b = foreach a generate 10, {(16, 4.0e-2, 'hello'), (0.5f, 12l, 'another tuple')};");
}
@Test
public void testQuery69() {
buildPlan(" a = load 'input1';");
buildPlan(" b = foreach a generate {(16, 4.0e-2, 'hello'), (0.5f, 'another tuple', 12L, (1))};");
}
@Test
public void testQuery70() {
buildPlan(" a = load 'input1';");
buildPlan(" b = foreach a generate ['10'#'hello', '4.0e-2'#10L, '0.5f'#(1), 'world'#42, '42'#{('guide')}] as mymap:map[];");
buildPlan(" c = foreach b generate mymap#'10';");
}
@Test
public void testQueryFail67() {
buildPlan(" a = load 'input1' as (name, age, gpa);");
try {
buildPlan(" b = foreach a generate age, age * 10L, gpa/0.2f, {16, 4.0e-2, 'hello'};");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQueryFail68() {
buildPlan(" a = load 'input1' as (name, age, gpa);");
try {
buildPlan(" b = foreach a generate {(16 L, 4.0e-2, 'hello'), (0.5f, 'another tuple', 12L, {()})};");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery71() {
buildPlan("split (load 'a') into x if $0 > '7', y if $0 < '7';");
buildPlan("b = foreach x generate $0;");
buildPlan("c = foreach y generate $1;");
}
@Test
public void testQuery72() {
buildPlan("split (load 'a') into x if $0 > '7', y if $0 < '7';");
buildPlan("b = foreach x generate (int)$0;");
buildPlan("c = foreach y generate (bag{})$1;");
buildPlan("d = foreach y generate (int)($1/2);");
buildPlan("e = foreach y generate (bag{tuple(int, float)})($1/2);");
buildPlan("f = foreach x generate (tuple(int, float))($1/2);");
buildPlan("g = foreach x generate (tuple())($1/2);");
buildPlan("h = foreach x generate (chararray)($1/2);");
}
@Test
public void testQueryFail72() {
buildPlan("split (load 'a') into x if $0 > '7', y if $0 < '7';");
try {
buildPlan("c = foreach y generate (bag)$1;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
try {
buildPlan("c = foreach y generate (bag{int, float})$1;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
try {
buildPlan("c = foreach y generate (tuple)$1;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery73() {
buildPlan("split (load 'a') into x if $0 > '7', y if $0 < '7';");
buildPlan("b = filter x by $0 matches '^fred.*';");
buildPlan("c = foreach y generate $0, ($0 matches 'yuri.*' ? $1 - 10 : $1);");
}
@Test
public void testQuery74() {
buildPlan("a = load 'a' as (field1: int, field2: long);");
buildPlan("b = load 'a' as (field1: bytearray, field2: double);");
buildPlan("c = group a by field1, b by field1;");
buildPlan("d = cogroup a by ((field1+field2)*field1), b by field1;");
}
@Test
public void testQuery77() {
buildPlan("limit (load 'a') 100;");
}
@Test
public void testLimitWithLong() {
buildPlan("limit (load 'a') 100L;");
}
@Test
public void testQuery75() {
buildPlan("a = union (load 'a'), (load 'b'), (load 'c');");
buildPlan("b = foreach a {generate $0;} parallel 10;");
}
@Test
public void testQuery76() {
buildPlan("split (load 'a') into x if $0 > '7', y if $0 < '7';");
buildPlan("b = filter x by $0 IS NULL;");
buildPlan("c = filter y by $0 IS NOT NULL;");
buildPlan("d = foreach b generate $0, ($1 IS NULL ? 0 : $1 - 7);");
buildPlan("e = foreach c generate $0, ($1 IS NOT NULL ? $1 - 5 : 0);");
}
@Test
public void testQuery80() {
buildPlan("a = load 'input1' as (name, age, gpa);");
buildPlan("b = filter a by age < '20';");
buildPlan("c = group b by age;");
String query = "d = foreach c {"
+ "cf = filter b by gpa < '3.0';"
+ "cp = cf.gpa;"
+ "cd = distinct cp;"
+ "co = order cd by gpa;"
+ "generate group, flatten(co);"
//+ "generate group, flatten(cd);"
+ "};";
buildPlan(query);
}
@Test
public void testQuery81() {
buildPlan("a = load 'input1' using PigStorage() as (name, age, gpa);");
buildPlan("split a into b if name lt 'f', c if (name gte 'f' and name lte 'h'), d if name gt 'h';");
}
@Test
public void testQueryFail81() {
buildPlan("a = load 'input1' using PigStorage() as (name, age, gpa);");
try {
buildPlan("split a into b if name lt 'f', c if (name ge 'f' and name le 'h'), d if name gt 'h';");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery82() {
buildPlan("a = load 'myfile';");
buildPlan("b = group a by $0;");
String query = "c = foreach b {"
+ "c1 = order $1 by *;"
+ "c2 = $1.$0;"
+ "generate flatten(c1), c2;"
+ "};";
buildPlan(query);
}
@Test
public void testQueryFail82() {
buildPlan("a = load 'myfile';");
buildPlan("b = group a by $0;");
String query = "c = foreach b {"
+ "c1 = order $1 by *;"
+ "c2 = $1;"
+ "generate flatten(c1), c2;"
+ "};";
try {
buildPlan(query);
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Exception"));
}
}
@Test
public void testQuery83() {
buildPlan("a = load 'input1' as (name, age, gpa);");
buildPlan("b = filter a by age < '20';");
buildPlan("c = group b by (name,age);");
String query = "d = foreach c {"
+ "cf = filter b by gpa < '3.0';"
+ "cp = cf.gpa;"
+ "cd = distinct cp;"
+ "co = order cd by gpa;"
+ "generate group, flatten(co);"
+ "};";
buildPlan(query);
}
@Test
public void testQuery84() {
buildPlan("a = load 'input1' as (name, age, gpa);");
buildPlan("b = filter a by age < '20';");
buildPlan("c = group b by (name,age);");
String query = "d = foreach c {"
+ "cf = filter b by gpa < '3.0';"
+ "cp = cf.$2;"
+ "cd = distinct cp;"
+ "co = order cd by gpa;"
+ "generate group, flatten(co);"
+ "};";
buildPlan(query);
}
@Test
public void testQuery85() throws FrontendException {
LogicalPlan lp;
buildPlan("a = load 'myfile' as (name, age, gpa);");
lp = buildPlan("b = group a by (name, age);");
LOCogroup cogroup = (LOCogroup) lp.getLeaves().get(0);
Schema.FieldSchema nameFs = new Schema.FieldSchema("name", DataType.BYTEARRAY);
Schema.FieldSchema ageFs = new Schema.FieldSchema("age", DataType.BYTEARRAY);
Schema.FieldSchema gpaFs = new Schema.FieldSchema("gpa", DataType.BYTEARRAY);
Schema groupSchema = new Schema(nameFs);
groupSchema.add(ageFs);
Schema.FieldSchema groupFs = new Schema.FieldSchema("group", groupSchema, DataType.TUPLE);
Schema loadSchema = new Schema(nameFs);
loadSchema.add(ageFs);
loadSchema.add(gpaFs);
Schema.FieldSchema bagFs = new Schema.FieldSchema("a", loadSchema, DataType.BAG);
Schema cogroupExpectedSchema = new Schema(groupFs);
cogroupExpectedSchema.add(bagFs);
assertTrue(cogroup.getSchema().equals(cogroupExpectedSchema));
lp = buildPlan("c = foreach b generate group.name, group.age, COUNT(a.gpa);");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
Schema foreachExpectedSchema = new Schema(nameFs);
foreachExpectedSchema.add(ageFs);
foreachExpectedSchema.add(new Schema.FieldSchema(null, DataType.LONG));
assertTrue(foreach.getSchema().equals(foreachExpectedSchema));
}
@Test
public void testQuery86() throws FrontendException {
LogicalPlan lp;
buildPlan("a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);");
lp = buildPlan("b = group a by (name, age);");
LOCogroup cogroup = (LOCogroup) lp.getLeaves().get(0);
Schema.FieldSchema nameFs = new Schema.FieldSchema("name", DataType.CHARARRAY);
Schema.FieldSchema ageFs = new Schema.FieldSchema("age", DataType.INTEGER);
Schema.FieldSchema gpaFs = new Schema.FieldSchema("gpa", DataType.FLOAT);
Schema groupSchema = new Schema(nameFs);
groupSchema.add(ageFs);
Schema.FieldSchema groupFs = new Schema.FieldSchema("group", groupSchema, DataType.TUPLE);
Schema loadSchema = new Schema(nameFs);
loadSchema.add(ageFs);
loadSchema.add(gpaFs);
Schema.FieldSchema bagFs = new Schema.FieldSchema("a", loadSchema, DataType.BAG);
Schema cogroupExpectedSchema = new Schema(groupFs);
cogroupExpectedSchema.add(bagFs);
assertTrue(cogroup.getSchema().equals(cogroupExpectedSchema));
}
@Test
public void testQuery87() {
buildPlan("a = load 'myfile';");
buildPlan("b = group a by $0;");
LogicalPlan lp = buildPlan("c = foreach b {c1 = order $1 by $1; generate flatten(c1); };");
LOForEach foreach = (LOForEach)lp.getLeaves().get(0);
LogicalPlan nestedPlan = foreach.getForEachPlans().get(0);
LOProject sortInput = (LOProject)nestedPlan.getRoots().get(0);
LOSort nestedSort = (LOSort)nestedPlan.getSuccessors(sortInput).get(0);
LogicalPlan sortPlan = nestedSort.getSortColPlans().get(0);
assertTrue(sortPlan.getLeaves().size() == 1);
}
@Test
public void testQuery88() {
buildPlan("a = load 'myfile';");
buildPlan("b = group a by $0;");
LogicalPlan lp = buildPlan("c = order b by $1 ;");
LOSort sort = (LOSort)lp.getLeaves().get(0);
LOProject project1 = (LOProject) sort.getSortColPlans().get(0).getLeaves().get(0) ;
LOCogroup cogroup = (LOCogroup) lp.getPredecessors(sort).get(0) ;
assertEquals(project1.getExpression(), cogroup) ;
}
@Test
public void testQuery89() {
buildPlan("a = load 'myfile';");
buildPlan("b = foreach a generate $0, $100;");
buildPlan("c = load 'myfile' as (i: int);");
buildPlan("d = foreach c generate $0 as zero, i;");
}
@Test
public void testQueryFail89() {
buildPlan("c = load 'myfile' as (i: int);");
try {
buildPlan("d = foreach c generate $0, $5;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Out of bound access"));
}
}
@Test
public void testQuery90() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
buildPlan("a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);");
buildPlan("b = group a by (name, age);");
//the first element in group, i.e., name is renamed as myname
lp = buildPlan("c = foreach b generate flatten(group) as (myname), COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("myname: chararray, age: int, mycount: long")));
//the first and second elements in group, i.e., name and age are renamed as myname and myage
lp = buildPlan("c = foreach b generate flatten(group) as (myname, myage), COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("myname: chararray, myage: int, mycount: long")));
//the schema of group is unchanged
lp = buildPlan("c = foreach b generate flatten(group) as (), COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("group::name: chararray, group::age: int, mycount: long")));
//the first element in group, i.e., name is renamed as myname
lp = buildPlan("c = foreach b generate flatten(group) as myname, COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("myname: chararray, age: int, mycount: long")));
//group is renamed as mygroup
lp = buildPlan("c = foreach b generate group as mygroup, COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("mygroup:(name: chararray, age: int), mycount: long")));
//group is renamed as mygroup and the first element is renamed as myname
lp = buildPlan("c = foreach b generate group as mygroup:(myname), COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("mygroup:(myname: chararray, age: int), mycount: long")));
//group is renamed as mygroup and the elements are renamed as myname and myage
lp = buildPlan("c = foreach b generate group as mygroup:(myname, myage), COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("mygroup:(myname: chararray, myage: int), mycount: long")));
//group is renamed to mygroup as the tuple schema is empty
lp = buildPlan("c = foreach b generate group as mygroup:(), COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("mygroup:(name: chararray, age: int), mycount: long")));
//setting the schema of flattened bag that has no schema with the user defined schema
buildPlan("c = load 'another_file';");
buildPlan("d = cogroup a by $0, c by $0;");
lp = buildPlan("e = foreach d generate flatten(DIFF(a, c)) as (x, y, z), COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: bytearray, y: bytearray, z: bytearray, mycount: long")));
//setting the schema of flattened bag that has no schema with the user defined schema
buildPlan("c = load 'another_file';");
buildPlan("d = cogroup a by $0, c by $0;");
lp = buildPlan("e = foreach d generate flatten(DIFF(a, c)) as (x: int, y: float, z), COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: int, y: float, z: bytearray, mycount: long")));
//setting the schema of flattened bag that has no schema with the user defined schema
buildPlan("c = load 'another_file';");
buildPlan("d = cogroup a by $0, c by $0;");
lp = buildPlan("e = foreach d generate flatten(DIFF(a, c)) as x, COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: bytearray, mycount: long")));
//setting the schema of flattened bag that has no schema with the user defined schema
buildPlan("c = load 'another_file';");
buildPlan("d = cogroup a by $0, c by $0;");
lp = buildPlan("e = foreach d generate flatten(DIFF(a, c)) as x: int, COUNT(a) as mycount;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: int, mycount: long")));
}
@Test
public void testQueryFail90() throws FrontendException, ParseException {
buildPlan("a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);");
buildPlan("b = group a by (name, age);");
try {
buildPlan("c = foreach b generate group as mygroup:(myname, myage, mygpa), COUNT(a) as mycount;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Schema size mismatch"));
}
try {
buildPlan("c = foreach b generate group as mygroup:(myname: int, myage), COUNT(a) as mycount;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Type mismatch"));
}
try {
buildPlan("c = foreach b generate group as mygroup:(myname, myage: chararray), COUNT(a) as mycount;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Type mismatch"));
}
try {
buildPlan("c = foreach b generate group as mygroup:{t: (myname, myage)}, COUNT(a) as mycount;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Type mismatch"));
}
try {
buildPlan("c = foreach b generate flatten(group) as (myname, myage, mygpa), COUNT(a) as mycount;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Schema size mismatch"));
}
}
@Test
public void testQuery91() {
buildPlan("a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);");
buildPlan("b = group a by name;");
buildPlan("c = foreach b generate SUM(a.age) + SUM(a.gpa);");
}
@Test
public void testQuery92() {
buildPlan("a = load 'myfile' as (name, age, gpa);");
buildPlan("b = group a by name;");
String query = "c = foreach b { "
+ " alias = name#'alias'; "
+ " af = alias#'first'; "
+ " al = alias#'last'; "
+ " generate SUM(a.age) + SUM(a.gpa); "
+ "};";
}
@Test
public void testQuery93() throws FrontendException, ParseException {
buildPlan("a = load 'one' as (name, age, gpa);");
buildPlan("b = group a by name;");
buildPlan("c = foreach b generate flatten(a);");
buildPlan("d = foreach c generate name;");
// test that we can refer to "name" field and not a::name
buildPlan("e = foreach d generate name;");
}
@Test
public void testQueryFail93() throws FrontendException, ParseException {
buildPlan("a = load 'one' as (name, age, gpa);");
buildPlan("b = group a by name;");
buildPlan("c = foreach b generate flatten(a);");
buildPlan("d = foreach c generate name;");
// test that we can refer to "name" field and a::name
buildPlan("e = foreach d generate a::name;");
}
@Test
public void testQuery94() throws FrontendException, ParseException {
buildPlan("a = load 'one' as (name, age, gpa);");
buildPlan("b = load 'two' as (name, age, somethingelse);");
buildPlan("c = cogroup a by name, b by name;");
buildPlan("d = foreach c generate flatten(a), flatten(b);");
// test that we can refer to "a::name" field and not name
// test that we can refer to "b::name" field and not name
buildPlan("e = foreach d generate a::name, b::name;");
// test that we can refer to gpa and somethingelse
buildPlan("f = foreach d generate gpa, somethingelse, a::gpa, b::somethingelse;");
}
@Test
public void testQueryFail94() throws FrontendException, ParseException {
buildPlan("a = load 'one' as (name, age, gpa);");
buildPlan("b = load 'two' as (name, age, somethingelse);");
buildPlan("c = cogroup a by name, b by name;");
buildPlan("d = foreach c generate flatten(a), flatten(b);");
// test that we can refer to "a::name" field and not name
try {
buildPlan("e = foreach d generate name;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Found more than one match:"));
}
}
@Test
public void testQuery95() throws FrontendException, ParseException {
buildPlan("a = load 'myfile' as (name, age, gpa);");
buildPlan("b = group a by name;");
LogicalPlan lp = buildPlan("c = foreach b {d = order a by $1; generate flatten(d), MAX(a.age) as max_age;};");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
LOCogroup cogroup = (LOCogroup) lp.getPredecessors(foreach).get(0);
Schema.FieldSchema bagFs = new Schema.FieldSchema("a", Util.getSchemaFromString("name: bytearray, age: bytearray, gpa: bytearray"), DataType.BAG);
Schema.FieldSchema groupFs = new Schema.FieldSchema("group", DataType.BYTEARRAY);
Schema cogroupExpectedSchema = new Schema();
cogroupExpectedSchema.add(groupFs);
cogroupExpectedSchema.add(bagFs);
assertTrue(Schema.equals(cogroup.getSchema(), cogroupExpectedSchema, false, false));
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("name: bytearray, age: bytearray, gpa: bytearray, max_age: double"), false, true));
}
@Test
public void testQuery96() throws FrontendException, ParseException {
buildPlan("a = load 'input' as (name, age, gpa);");
buildPlan("b = filter a by age < 20;");
buildPlan("c = group b by age;");
String query = "d = foreach c {"
+ "cf = filter b by gpa < 3.0;"
+ "cd = distinct cf.gpa;"
+ "co = order cd by $0;"
+ "generate group, flatten(co);"
+ "};";
LogicalPlan lp = buildPlan(query);
LOForEach foreach = (LOForEach)lp.getLeaves().get(0);
ArrayList<LogicalPlan> foreachPlans = foreach.getForEachPlans();
LogicalPlan flattenPlan = foreachPlans.get(1);
LogicalOperator project = flattenPlan.getLeaves().get(0);
assertTrue(project instanceof LOProject);
LogicalOperator sort = flattenPlan.getPredecessors(project).get(0);
assertTrue(sort instanceof LOSort);
LogicalOperator distinct = flattenPlan.getPredecessors(sort).get(0);
assertTrue(distinct instanceof LODistinct);
//testing the presence of the nested foreach
LogicalOperator nestedForeach = flattenPlan.getPredecessors(distinct).get(0);
assertTrue(nestedForeach instanceof LOForEach);
LogicalPlan nestedForeachPlan = ((LOForEach)nestedForeach).getForEachPlans().get(0);
LogicalOperator nestedProject = nestedForeachPlan.getRoots().get(0);
assertTrue(nestedProject instanceof LOProject);
assertTrue(((LOProject)nestedProject).getCol() == 2);
//testing the filter inner plan for the absence of the project connected to project
LogicalOperator filter = flattenPlan.getPredecessors(nestedForeach).get(0);
assertTrue(filter instanceof LOFilter);
LogicalPlan comparisonPlan = ((LOFilter)filter).getComparisonPlan();
LOLesserThan lessThan = (LOLesserThan)comparisonPlan.getLeaves().get(0);
LOProject filterProject = (LOProject)lessThan.getLhsOperand();
assertTrue(null == comparisonPlan.getPredecessors(filterProject));
}
@Test
public void testQuery97() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = foreach a generate 1;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("x: int"), false, true));
lp = buildPlan("b = foreach a generate 1L;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("x: long"), false, true));
lp = buildPlan("b = foreach a generate 1.0;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("x: double"), false, true));
lp = buildPlan("b = foreach a generate 1.0f;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("x: float"), false, true));
lp = buildPlan("b = foreach a generate 'hello';");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("x: chararray"), false, true));
}
@Test
public void testQuery98() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = foreach a generate (1);");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("t:(x: int)"), false, true));
lp = buildPlan("b = foreach a generate (1L);");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("t:(x: long)"), false, true));
lp = buildPlan("b = foreach a generate (1.0);");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("t:(x: double)"), false, true));
lp = buildPlan("b = foreach a generate (1.0f);");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("t:(x: float)"), false, true));
lp = buildPlan("b = foreach a generate ('hello');");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("t:(x: chararray)"), false, true));
lp = buildPlan("b = foreach a generate ('hello', 1, 1L, 1.0f, 1.0);");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("t:(x: chararray, y: int, z: long, a: float, b: double)"), false, true));
lp = buildPlan("b = foreach a generate ('hello', {(1), (1.0)});");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("t:(x: chararray, ib:{it:(d: double)})"), false, true));
}
@Test
public void testQuery99() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = foreach a generate {(1, 'hello'), (2, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:(x: int, y: chararray)}"), false, true));
lp = buildPlan("b = foreach a generate {(1, 'hello'), (1L, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:(x: long, y: chararray)}"), false, true));
lp = buildPlan("b = foreach a generate {(1, 'hello'), (1.0f, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:(x: float, y: chararray)}"), false, true));
lp = buildPlan("b = foreach a generate {(1, 'hello'), (1.0, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:(x: double, y: chararray)}"), false, true));
lp = buildPlan("b = foreach a generate {(1L, 'hello'), (1.0f, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:(x: float, y: chararray)}"), false, true));
lp = buildPlan("b = foreach a generate {(1L, 'hello'), (1.0, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:(x: double, y: chararray)}"), false, true));
lp = buildPlan("b = foreach a generate {(1.0f, 'hello'), (1.0, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:(x: double, y: chararray)}"), false, true));
lp = buildPlan("b = foreach a generate {(1.0, 'hello'), (1.0f, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:(x: double, y: chararray)}"), false, true));
lp = buildPlan("b = foreach a generate {(1.0, 'hello', 3.14), (1.0f, 'world')};");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("b:{t:()}"), false, true));
}
@Test
public void testQuery101() {
// test usage of an alias from define
String query = "define FUNC ARITY();";
buildPlan(query);
query = "foreach (load 'data') generate FUNC($0);";
buildPlan(query);
}
@Test
public void testQuery102() {
// test basic store
buildPlan("a = load 'a';");
buildPlan("store a into 'out';");
}
@Test
public void testQuery103() {
// test store with store function
buildPlan("a = load 'a';");
buildPlan("store a into 'out' using PigStorage();");
}
@Test
public void testQuery104() {
// check that a field alias can be referenced
// by unambiguous free form alias, fully qualified alias
// and partially qualified unambiguous alias
buildPlan( "a = load 'st10k' as (name, age, gpa);" );
buildPlan( "b = group a by name;" );
buildPlan("c = foreach b generate flatten(a);" );
buildPlan("d = filter c by name != 'fred';" );
buildPlan("e = group d by name;" );
buildPlan("f = foreach e generate flatten(d);" );
buildPlan("g = foreach f generate name, d::a::name, a::name;");
}
@Test
public void testQuery105() {
// test that the alias "group" can be used
// after a flatten(group)
buildPlan( "a = load 'st10k' as (name, age, gpa);" );
buildPlan("b = group a by name;" );
buildPlan("c = foreach b generate flatten(group), COUNT(a) as cnt;" );
buildPlan("d = foreach c generate group;");
}
@Test
public void testQuery106() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = foreach a generate *;");
foreach = (LOForEach) lp.getLeaves().get(0);
assertTrue(Schema.equals(foreach.getSchema(), Util.getSchemaFromString("name: bytearray, age: bytearray, gpa: bytearray"), false, true));
}
@Test
public void testQuery107() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
buildPlan("a = load 'one';");
lp = buildPlan("b = foreach a generate *;");
foreach = (LOForEach) lp.getLeaves().get(0);
LogicalPlan foreachPlan = foreach.getForEachPlans().get(0);
assertTrue(checkPlanForProjectStar(foreachPlan));
}
@Test
public void testQuery108() throws FrontendException, ParseException {
LogicalPlan lp;
LOCogroup cogroup;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = group a by *;");
cogroup = (LOCogroup) lp.getLeaves().get(0);
Schema groupSchema = Util.getSchemaFromString("name: bytearray, age: bytearray, gpa: bytearray");
Schema bagASchema = Util.getSchemaFromString("name: bytearray, age: bytearray, gpa: bytearray");
Schema.FieldSchema groupFs = new Schema.FieldSchema("group", groupSchema, DataType.TUPLE);
Schema.FieldSchema bagAFs = new Schema.FieldSchema("a", bagASchema, DataType.BAG);
Schema expectedSchema = new Schema(groupFs);
expectedSchema.add(bagAFs);
assertTrue(Schema.equals(cogroup.getSchema(), expectedSchema, false, true));
}
@Test
public void testQuery109() throws FrontendException, ParseException {
LogicalPlan lp;
LOCogroup cogroup;
buildPlan("a = load 'one' as (name, age, gpa);");
buildPlan("b = load 'two' as (first_name, enrol_age, high_school_gpa);");
lp = buildPlan("c = group a by *, b by *;");
cogroup = (LOCogroup) lp.getLeaves().get(0);
Schema groupSchema = Util.getSchemaFromString("name: bytearray, age: bytearray, gpa: bytearray");
Schema bagASchema = Util.getSchemaFromString("name: bytearray, age: bytearray, gpa: bytearray");
Schema bagBSchema = Util.getSchemaFromString("first_name: bytearray, enrol_age: bytearray, high_school_gpa: bytearray");
Schema.FieldSchema groupFs = new Schema.FieldSchema("group", groupSchema, DataType.TUPLE);
Schema.FieldSchema bagAFs = new Schema.FieldSchema("a", bagASchema, DataType.BAG);
Schema.FieldSchema bagBFs = new Schema.FieldSchema("b", bagBSchema, DataType.BAG);
Schema expectedSchema = new Schema(groupFs);
expectedSchema.add(bagAFs);
expectedSchema.add(bagBFs);
assertTrue(Schema.equals(cogroup.getSchema(), expectedSchema, false, true));
}
@Test
public void testQuery110Fail() throws FrontendException, ParseException {
LogicalPlan lp;
LOLoad load;
LOCogroup cogroup;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = load 'two';");
load = (LOLoad) lp.getLeaves().get(0);
boolean exceptionThrown = false;
try{
lp = buildPlan("c = cogroup a by $0, b by *;");
} catch(AssertionFailedError e) {
assertTrue(e.getMessage().contains("Cogroup/Group by * is only allowed if " +
"the input has a schema"));
exceptionThrown = true;
}
assertTrue(exceptionThrown);
}
@Test
public void testQuery111() throws FrontendException, ParseException {
LogicalPlan lp;
LOSort sort;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = order a by *;");
sort = (LOSort) lp.getLeaves().get(0);
for(LogicalPlan sortPlan: sort.getSortColPlans()) {
assertTrue(checkPlanForProjectStar(sortPlan) == false);
}
}
@Test
public void testQuery112() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
LOSort sort;
buildPlan("a = load 'one' as (name, age, gpa);");
buildPlan("b = group a by *;");
lp = buildPlan("c = foreach b {a1 = order a by *; generate a1;};");
foreach = (LOForEach) lp.getLeaves().get(0);
for(LogicalPlan foreachPlan: foreach.getForEachPlans()) {
assertTrue(checkPlanForProjectStar(foreachPlan) == true);
}
LogicalPlan foreachPlan = foreach.getForEachPlans().get(0);
sort = (LOSort)foreachPlan.getPredecessors(foreachPlan.getLeaves().get(0)).get(0);
// project (*) operator here is translated to a list of projection
// operators
for(LogicalPlan sortPlan: sort.getSortColPlans()) {
assertTrue(checkPlanForProjectStar(sortPlan) == false);
}
}
@Test
public void testQuery113() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
LOSort sort;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = foreach a {exp1 = age + gpa; exp2 = exp1 + age; generate exp1, exp2;};");
foreach = (LOForEach) lp.getLeaves().get(0);
for(LogicalPlan foreachPlan: foreach.getForEachPlans()) {
printPlan(foreachPlan);
assertTrue(checkPlanForProjectStar(foreachPlan) == false);
}
}
@Test
public void testQuery114() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
LOSort sort;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = foreach a generate " + Identity.class.getName() + "(name, age);");
foreach = (LOForEach) lp.getLeaves().get(0);
Schema s = new Schema();
s.add(new Schema.FieldSchema("name", DataType.BYTEARRAY));
s.add(new Schema.FieldSchema("age", DataType.BYTEARRAY));
Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, s, DataType.TUPLE);
Schema expectedSchema = new Schema(tupleFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testQuery115() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
LOSort sort;
buildPlan("a = load 'one' as (name, age, gpa);");
lp = buildPlan("b = foreach a generate " + Identity.class.getName() + "(*);");
foreach = (LOForEach) lp.getLeaves().get(0);
Schema s = new Schema();
s.add(new Schema.FieldSchema("name", DataType.BYTEARRAY));
s.add(new Schema.FieldSchema("age", DataType.BYTEARRAY));
s.add(new Schema.FieldSchema("gpa", DataType.BYTEARRAY));
Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, s, DataType.TUPLE);
Schema expectedSchema = new Schema(tupleFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testQuery116() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
LOSort sort;
buildPlan("a = load 'one';");
lp = buildPlan("b = foreach a generate " + Identity.class.getName() + "($0, $1);");
foreach = (LOForEach) lp.getLeaves().get(0);
Schema s = new Schema();
s.add(new Schema.FieldSchema(null, DataType.BYTEARRAY));
s.add(new Schema.FieldSchema(null, DataType.BYTEARRAY));
Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, s, DataType.TUPLE);
Schema expectedSchema = new Schema(tupleFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testQuery117() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
LOSort sort;
buildPlan("a = load 'one';");
lp = buildPlan("b = foreach a generate " + Identity.class.getName() + "(*);");
foreach = (LOForEach) lp.getLeaves().get(0);
Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, null, DataType.TUPLE);
Schema expectedSchema = new Schema(tupleFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testNullConsArithExprs() {
buildPlan("a = load 'a' as (x:int, y:double);" );
buildPlan("b = foreach a generate x + null, x * null, x / null, x - null, null % x, " +
"y + null, y * null, y / null, y - null;"
);
}
@Test
public void testNullConsBincond1() {
buildPlan("a = load 'a' as (x:int, y:double);" );
buildPlan("b = foreach a generate (2 > 1? null : 1), ( 2 < 1 ? null : 1), " +
"(2 > 1 ? 1 : null), ( 2 < 1 ? 1 : null);"
);
}
@Test
public void testNullConsBincond2() {
buildPlan("a = load 'a' as (x:int, y:double);" );
buildPlan("b = foreach a generate (null is null ? 1 : 2), ( null is not null ? 2 : 1);");
}
@Test
public void testNullConsForEachGenerate() {
buildPlan("a = load 'a' as (x:int, y:double);" );
buildPlan("b = foreach a generate x, null, y, null;");
}
@Test
public void testNullConsOuterJoin() {
buildPlan("a = load 'a' as (x:int, y:chararray);" );
buildPlan("b = load 'b' as (u:int, v:chararray);" );
buildPlan("c = cogroup a by x, b by u;" );
buildPlan("d = foreach c generate flatten((SIZE(a) == 0 ? null : a)), " +
"flatten((SIZE(b) == 0 ? null : b));"
);
}
@Test
public void testNullConsConcatSize() {
buildPlan("a = load 'a' as (x:int, y:double, str:chararray);" );
buildPlan("b = foreach a generate SIZE(null), CONCAT(str, null), " +
"CONCAT(null, str);"
);
}
@Test
public void testFilterUdfDefine() {
buildPlan("define isempty IsEmpty();");
buildPlan("a = load 'a' as (x:int, y:double, str:chararray);");
buildPlan("b = filter a by isempty(*);");
}
@Test
public void testLoadUdfDefine() {
buildPlan("define PS PigStorage();");
buildPlan("a = load 'a' using PS as (x:int, y:double, str:chararray);" );
buildPlan("b = filter a by IsEmpty(*);");
}
@Test
public void testLoadUdfConstructorArgDefine() {
buildPlan("define PS PigStorage(':');");
buildPlan("a = load 'a' using PS as (x:int, y:double, str:chararray);" );
buildPlan("b = filter a by IsEmpty(*);");
}
@Test
public void testStoreUdfDefine() {
buildPlan( "define PS PigStorage();");
buildPlan("a = load 'a' using PS as (x:int, y:double, str:chararray);" );
buildPlan("b = filter a by IsEmpty(*);" );
buildPlan("store b into 'x' using PS;");
}
@Test
public void testStoreUdfConstructorArgDefine() {
buildPlan( "define PS PigStorage(':');");
buildPlan(" a = load 'a' using PS as (x:int, y:double, str:chararray);" );
buildPlan(" b = filter a by IsEmpty(*);" );
buildPlan(" store b into 'x' using PS;") ;
}
@Test
public void testCastAlias() {
buildPlan("a = load 'one.txt' as (x,y); ");
buildPlan("b = foreach a generate (int)x, (double)y;");
buildPlan("c = group b by x;");
}
@Test
public void testCast() {
buildPlan("a = load 'one.txt' as (x,y); " );
buildPlan("b = foreach a generate (int)$0, (double)$1;" );
buildPlan("c = group b by $0;");
}
@Test
public void testReservedWordsInFunctionNames() {
// test that define can contain reserved words are later parts of
// fully qualified function name
String[] keywords = {
"define",
"load",
"filter",
"foreach",
"matches",
"order",
"arrange",
"distinct",
"cogroup",
"join",
"cross",
"union",
"split",
"into",
"if",
"all",
"any",
"as",
"by",
"using",
"inner",
"outer",
"parallel",
"partition",
"group",
"and",
"or",
"not",
"generate",
"flatten",
"eval",
"asc",
"desc",
"int",
"long",
"float",
"double",
"chararray",
"bytearray",
"bag",
"tuple",
"map",
"is",
"null",
"stream",
"through",
"store",
"ship",
"cache",
"input",
"output",
"stderr",
"stdin",
"stdout",
"limit",
"sample",
"left",
"right",
"full",
"eq",
"gt",
"lt",
"gte",
"lte",
"neq"
};
for(String keyword: keywords) {
String query = "define FUNC org.apache."+keyword+"();";
LogicalOperator lo = buildPlan(query).getRoots().get(0);
assertTrue(lo instanceof LODefine);
}
}
@Test
public void testTokenizeSchema() throws FrontendException, ParseException {
LogicalPlan lp;
LOForEach foreach;
buildPlan("a = load 'one' as (f1: chararray);");
lp = buildPlan("b = foreach a generate TOKENIZE(f1);");
foreach = (LOForEach) lp.getLeaves().get(0);
Schema.FieldSchema tokenFs = new Schema.FieldSchema("token",
DataType.CHARARRAY);
Schema tupleSchema = new Schema(tokenFs);
Schema.FieldSchema tupleFs;
tupleFs = new Schema.FieldSchema("tuple_of_tokens", tupleSchema,
DataType.TUPLE);
Schema bagSchema = new Schema(tupleFs);
bagSchema.setTwoLevelAccessRequired(true);
Schema.FieldSchema bagFs = new Schema.FieldSchema(
"bag_of_tokenTuples",bagSchema, DataType.BAG);
assertTrue(Schema.equals(foreach.getSchema(), new Schema(bagFs), false, true));
}
@Test
public void testEmptyTupleConst() throws FrontendException{
LogicalPlan lp = buildPlan("a = foreach (load 'b') generate ();");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
LogicalOperator logOp = foreach.getForEachPlans().get(0).getLeaves().get(0);
assertTrue( logOp instanceof LOConst);
LOConst loConst = (LOConst)logOp;
assertTrue(loConst.getType() == DataType.TUPLE);
assertTrue(loConst.getValue() instanceof Tuple);
assertTrue(loConst.getValue().equals(TupleFactory.getInstance().newTuple()));
Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, null, DataType.TUPLE);
Schema expectedSchema = new Schema(tupleFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testEmptyMapConst() throws FrontendException{
LogicalPlan lp = buildPlan("a = foreach (load 'b') generate [];");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
LogicalOperator logOp = foreach.getForEachPlans().get(0).getLeaves().get(0);
assertTrue( logOp instanceof LOConst);
LOConst loConst = (LOConst)logOp;
assertTrue(loConst.getType() == DataType.MAP);
assertTrue(loConst.getValue() instanceof Map);
assertTrue(loConst.getValue().equals(new HashMap<String,Object>()));
Schema.FieldSchema mapFs = new Schema.FieldSchema(null, null, DataType.MAP);
Schema expectedSchema = new Schema(mapFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testEmptyBagConst() throws FrontendException{
LogicalPlan lp = buildPlan("a = foreach (load 'b') generate {};");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
LogicalOperator logOp = foreach.getForEachPlans().get(0).getLeaves().get(0);
assertTrue( logOp instanceof LOConst);
LOConst loConst = (LOConst)logOp;
assertTrue(loConst.getType() == DataType.BAG);
assertTrue(loConst.getValue() instanceof DataBag);
assertTrue(loConst.getValue().equals(BagFactory.getInstance().newDefaultBag()));
Schema.FieldSchema bagFs = new Schema.FieldSchema(null, null, DataType.BAG);
Schema expectedSchema = new Schema(bagFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testEmptyTupConstRecursive1() throws FrontendException{
LogicalPlan lp = buildPlan("a = foreach (load 'b') generate (());");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, null, DataType.TUPLE);
Schema tupleSchema = new Schema(tupleFs);
Schema.FieldSchema tupleFs2 = new Schema.FieldSchema(null, tupleSchema, DataType.TUPLE);
Schema expectedSchema = new Schema(tupleFs2);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testEmptyTupConstRecursive2() throws FrontendException{
LogicalPlan lp = buildPlan("a = foreach (load 'b') generate ([]);");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
Schema.FieldSchema mapFs = new Schema.FieldSchema(null, null, DataType.MAP);
Schema tupleSchema = new Schema(mapFs);
Schema.FieldSchema tupleFs = new Schema.FieldSchema(null, tupleSchema, DataType.TUPLE);
Schema expectedSchema = new Schema(tupleFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testEmptyTupConstRecursive3() throws FrontendException{
LogicalPlan lp = buildPlan("a = foreach (load 'b') generate ({});");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
Schema.FieldSchema bagFs = new Schema.FieldSchema(null, null, DataType.BAG);
Schema innerSchema = new Schema(bagFs);
Schema.FieldSchema outerTupleFs = new Schema.FieldSchema(null,innerSchema,DataType.TUPLE);
Schema expectedSchema = new Schema(outerTupleFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testEmptyBagConstRecursive() throws FrontendException{
LogicalPlan lp = buildPlan("a = foreach (load 'b') generate {()};");
LOForEach foreach = (LOForEach) lp.getLeaves().get(0);
Schema.FieldSchema bagFs = new Schema.FieldSchema(null,null,DataType.TUPLE);
Schema bagSchema = new Schema(bagFs);
bagSchema.setTwoLevelAccessRequired(true);
Schema.FieldSchema outerBagFs = new Schema.FieldSchema(null,bagSchema,DataType.BAG);
Schema expectedSchema = new Schema(outerBagFs);
assertTrue(Schema.equals(foreach.getSchema(), expectedSchema, false, true));
}
@Test
public void testRandomEmptyConst(){
// Various random scripts to test recursive nature of parser with empty constants.
buildPlan("a = foreach (load 'b') generate {({})};");
buildPlan("a = foreach (load 'b') generate ({()});");
buildPlan("a = foreach (load 'b') generate {(),()};");
buildPlan("a = foreach (load 'b') generate ({},{});");
buildPlan("a = foreach (load 'b') generate ((),());");
buildPlan("a = foreach (load 'b') generate ([],[]);");
buildPlan("a = foreach (load 'b') generate {({},{})};");
buildPlan("a = foreach (load 'b') generate {([],[])};");
buildPlan("a = foreach (load 'b') generate (({},{}));");
buildPlan("a = foreach (load 'b') generate (([],[]));");
}
@Test
// See PIG-1024, shall not throw exception
public void testLimitMultipleOutput() {
buildPlan(" a = load '1.txt' as (a0:int, a1:int, a2:int);");
buildPlan(" b = group a by a0;");
buildPlan(" c = foreach b { c1 = limit a 10;c2 = (c1.a0/c1.a1);c3 = (c1.a0/c1.a2);generate c2, c3;};");
}
@Test
// See PIG-644
public void testDuplicateSchema1() {
try {
LogicalPlan lp = buildPlan(" a = load '1.txt' as (a0:int, a0:int);");
SchemaAliasValidator schemaAliasValidator = new SchemaAliasValidator() ;
CompilationMessageCollector collector = new CompilationMessageCollector() ;
schemaAliasValidator.validate(lp, collector);
} catch (PlanValidationException e) {
assertTrue(e.getCause().getMessage().contains("Duplicate schema"));
return;
}
fail();
}
@Test
// See PIG-644
public void testDuplicateSchema2() {
try {
buildPlan(" a = load '1.txt' as (a0:int, a1:int);");
LogicalPlan lp = buildPlan(" b = foreach a generate a0, a1 as a0;");
SchemaAliasValidator schemaAliasValidator = new SchemaAliasValidator() ;
CompilationMessageCollector collector = new CompilationMessageCollector() ;
schemaAliasValidator.validate(lp, collector);
} catch (PlanValidationException e) {
assertTrue(e.getCause().getMessage().contains("Duplicate schema"));
return;
}
fail();
}
@Test
public void testCogroupByStarFailure1() {
boolean exceptionThrown = false;
try {
buildPlan(" a = load '1.txt' as (a0:int, a1:int);");
buildPlan(" b = load '2.txt'; ");
buildPlan("c = cogroup a by *, b by *;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Cogroup/Group by * is only allowed if " +
"the input has a schema"));
exceptionThrown = true;
}
assertEquals("An exception was expected but did " +
"not occur", true, exceptionThrown);
}
@Test
public void testCogroupByStarFailure2() {
boolean exceptionThrown = false;
try {
buildPlan(" a = load '1.txt' ;");
buildPlan(" b = load '2.txt' as (b0:int, b1:int); ");
buildPlan("c = cogroup a by *, b by *;");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Cogroup/Group by * is only allowed if " +
"the input has a schema"));
exceptionThrown = true;
}
assertEquals("An exception was expected but did " +
"not occur", true, exceptionThrown);
}
@Test
public void testCogroupByIncompatibleSchemaFailure() {
boolean exceptionThrown = false;
try {
buildPlan(" a = load '1.txt' as (a0:int, a1:int);");
buildPlan(" b = load '2.txt' as (a0:int, a1:chararray); ");
buildPlan("c = cogroup a by (a0,a1), b by (a0,a1);");
} catch (AssertionFailedError e) {
assertTrue(e.getMessage().contains("Cogroup column"));
assertTrue(e.getMessage().contains("has incompatible types"));
exceptionThrown = true;
}
assertEquals("An exception was expected but did " +
"not occur", true, exceptionThrown);
}
@Test
public void testLoaderSignature() {
LogicalPlan plan = buildPlan(" a = load '1.txt' using org.apache.pig.test.PigStorageWithSchema() as (a0:int, a1:int);");
assertTrue(((PigStorageWithSchema)((LOLoad)plan.getLeaves().get(0)).getLoadFunc()).getUDFContextSignature().equals("a"));
plan = buildPlan(" b = load '1.txt' using org.apache.pig.test.PigStorageWithSchema();");
assertTrue(((PigStorageWithSchema)((LOLoad)plan.getLeaves().get(0)).getLoadFunc()).getUDFContextSignature().equals("b"));
}
private void printPlan(LogicalPlan lp) {
LOPrinter graphPrinter = new LOPrinter(System.err, lp);
System.err.println("Printing the logical plan");
try {
graphPrinter.visit();
} catch (Exception e) {
System.err.println(e.getMessage());
}
System.err.println();
}
private boolean checkPlanForProjectStar(LogicalPlan lp) {
List<LogicalOperator> leaves = lp.getLeaves();
for(LogicalOperator op: leaves) {
if(op instanceof LOProject) {
if(((LOProject) op).isStar()) {
return true;
}
}
}
return false;
}
// Helper Functions
// Helper Functions
// =================
public LogicalPlan buildPlan(String query) {
return buildPlan(query, LogicalPlanBuilder.class.getClassLoader());
}
public LogicalPlan buildPlan(String query, ClassLoader cldr) {
LogicalPlanBuilder.classloader = cldr;
try {
pigContext.connect();
LogicalPlanBuilder builder = new LogicalPlanBuilder(pigContext); //
LogicalPlan lp = builder.parse("Test-Plan-Builder",
query,
aliases,
logicalOpTable,
aliasOp,
fileNameMap);
List<LogicalOperator> roots = lp.getRoots();
if(roots.size() > 0) {
for(LogicalOperator op: roots) {
if (!(op instanceof LOLoad) && !(op instanceof LODefine)){
throw new Exception("Cannot have a root that is not the load or define operator. Found " + op.getClass().getName());
}
}
}
//System.err.println("Query: " + query);
assertNotNull(lp != null);
return lp;
} catch (IOException e) {
// log.error(e);
//System.err.println("IOException Stack trace for query: " + query);
//e.printStackTrace();
PigException pe = LogUtils.getPigException(e);
fail("IOException: " + (pe == null? e.getMessage(): pe.getMessage()));
} catch (Exception e) {
log.error(e);
//System.err.println("Exception Stack trace for query: " + query);
//e.printStackTrace();
PigException pe = LogUtils.getPigException(e);
fail(e.getClass().getName() + ": " + (pe == null? e.getMessage(): pe.getMessage()) + " -- " + query);
}
return null;
}
Map<LogicalOperator, LogicalPlan> aliases = new HashMap<LogicalOperator, LogicalPlan>();
Map<OperatorKey, LogicalOperator> logicalOpTable = new HashMap<OperatorKey, LogicalOperator>();
Map<String, LogicalOperator> aliasOp = new HashMap<String, LogicalOperator>();
Map<String, String> fileNameMap = new HashMap<String, String>();
PigContext pigContext = new PigContext(ExecType.MAPREDUCE, cluster.getProperties());
}