/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.piggybank.test;
import static org.junit.Assert.assertEquals;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.ResourceSchema;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.test.MiniCluster;
import org.apache.pig.test.Util;
import org.apache.pig.test.utils.TypeCheckingTestUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import junit.framework.TestCase;
public class TestPigStorageSchema extends TestCase {
protected ExecType execType = ExecType.MAPREDUCE;
PigContext pigContext = new PigContext(ExecType.MAPREDUCE, new Properties());
Map<LogicalOperator, LogicalPlan> aliases = new HashMap<LogicalOperator, LogicalPlan>();
Map<OperatorKey, LogicalOperator> logicalOpTable = new HashMap<OperatorKey, LogicalOperator>();
Map<String, LogicalOperator> aliasOp = new HashMap<String, LogicalOperator>();
Map<String, String> fileNameMap = new HashMap<String, String>();
MiniCluster cluster = MiniCluster.buildCluster();
private PigServer pig;
@Before
@Override
protected void setUp() throws Exception {
super.setUp();
pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
String origPath = FileLocalizer.fullPath("originput", pig.getPigContext());
if (FileLocalizer.fileExists(origPath, pig.getPigContext())) {
FileLocalizer.delete(origPath, pig.getPigContext());
}
Util.createInputFile(cluster, "originput",
new String[] {"A,1", "B,2", "C,3", "D,2",
"A,5", "B,5", "C,8", "A,8",
"D,8", "A,9"});
}
@After
@Override
protected void tearDown() throws Exception {
Util.deleteFile(cluster, "originput");
String aoutPath = FileLocalizer.fullPath("aout", pig.getPigContext());
if (FileLocalizer.fileExists(aoutPath, pig.getPigContext())) {
FileLocalizer.delete(aoutPath, pig.getPigContext());
}
}
@Test
public void testPigStorageSchema() throws Exception {
pigContext.connect();
String query = "a = LOAD 'originput' using org.apache.pig.piggybank.storage.PigStorageSchema() as (f1:chararray, f2:int);";
pig.registerQuery(query);
Schema origSchema = pig.dumpSchema("a");
pig.registerQuery("STORE a into 'aout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
// aout now has a schema.
// Verify that loading a-out with no given schema produces
// the original schema.
pig.registerQuery("b = LOAD 'aout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
Schema genSchema = pig.dumpSchema("b");
assertTrue("generated schema equals original" , Schema.equals(genSchema, origSchema, true, false));
// Verify that giving our own schema works
String [] aliases ={"foo", "bar"};
byte[] types = {DataType.INTEGER, DataType.LONG};
Schema newSchema = TypeCheckingTestUtil.genFlatSchema(
aliases,types);
pig.registerQuery("c = LOAD 'aout' using org.apache.pig.piggybank.storage.PigStorageSchema() as (foo:int, bar:long);");
Schema newGenSchema = pig.dumpSchema("c");
assertTrue("explicit schema overrides metadata", Schema.equals(newSchema, newGenSchema, true, false));
}
@Test
public void testSchemaConversion() throws Exception {
Util.createInputFile(cluster, "originput2",
new String[] {"1", "2", "3", "2",
"5", "5", "8", "8",
"8", "9"});
pig.registerQuery("A = LOAD 'originput2' using org.apache.pig.piggybank.storage.PigStorageSchema() as (f:int);");
pig.registerQuery("B = group A by f;");
Schema origSchema = pig.dumpSchema("B");
ResourceSchema rs1 = new ResourceSchema(origSchema);
pig.registerQuery("STORE B into 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
pig.registerQuery("C = LOAD 'bout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
Schema genSchema = pig.dumpSchema("C");
ResourceSchema rs2 = new ResourceSchema(genSchema);
assertTrue("generated schema equals original" , ResourceSchema.equals(rs1, rs2));
pig.registerQuery("C1 = LOAD 'bout' as (a0:int, A: {t: (f:int) } );");
pig.registerQuery("D = foreach C1 generate a0, SUM(A);");
List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(
new String[] {
"(1,1L)",
"(2,4L)",
"(3,3L)",
"(5,10L)",
"(8,24L)",
"(9,9L)"
});
Iterator<Tuple> iter = pig.openIterator("D");
int counter = 0;
while (iter.hasNext()) {
assertEquals(expectedResults.get(counter++).toString(), iter.next().toString());
}
assertEquals(expectedResults.size(), counter);
}
@Test
public void testSchemaConversion2() throws Exception {
pig.registerQuery("A = LOAD 'originput' using org.apache.pig.piggybank.storage.PigStorageSchema(',') as (f1:chararray, f2:int);");
pig.registerQuery("B = group A by f1;");
Schema origSchema = pig.dumpSchema("B");
ResourceSchema rs1 = new ResourceSchema(origSchema);
pig.registerQuery("STORE B into 'cout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
pig.registerQuery("C = LOAD 'cout' using org.apache.pig.piggybank.storage.PigStorageSchema();");
Schema genSchema = pig.dumpSchema("C");
ResourceSchema rs2 = new ResourceSchema(genSchema);
assertTrue("generated schema equals original" , ResourceSchema.equals(rs1, rs2));
pig.registerQuery("C1 = LOAD 'cout' as (a0:chararray, A: {t: (f1:chararray, f2:int) } );");
pig.registerQuery("D = foreach C1 generate a0, SUM(A.f2);");
List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(
new String[] {
"('A',23L)",
"('B',7L)",
"('C',11L)",
"('D',10L)"
});
Iterator<Tuple> iter = pig.openIterator("D");
int counter = 0;
while (iter.hasNext()) {
assertEquals(expectedResults.get(counter++).toString(), iter.next().toString());
}
assertEquals(expectedResults.size(), counter);
}
}