/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.apache.pig.ExecType.MAPREDUCE;
import static org.junit.Assert.*;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.Properties;
import java.util.Map.Entry;
import junit.framework.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.FileLocalizer;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class TestPigStorage {
protected final Log log = LogFactory.getLog(getClass());
private static MiniCluster cluster = MiniCluster.buildCluster();
@Before
public void setup() {
// some tests are in map-reduce mode and some in local - so before
// each test, we will de-initialize FileLocalizer so that temp files
// are created correctly depending on the ExecType in the test.
FileLocalizer.setInitialized(false);
}
@AfterClass
public static void shutdown() {
cluster.shutDown();
}
@Test
public void testBlockBoundary() throws ExecException {
// This tests PigStorage loader with records exectly
// on the boundary of the file blocks.
Properties props = new Properties();
for (Entry<Object, Object> entry : cluster.getProperties().entrySet()) {
props.put(entry.getKey(), entry.getValue());
}
props.setProperty("mapred.max.split.size", "20");
PigServer pigServer = new PigServer(MAPREDUCE, props);
String[] inputs = {
"abcdefgh1", "abcdefgh2", "abcdefgh3",
"abcdefgh4", "abcdefgh5", "abcdefgh6",
"abcdefgh7", "abcdefgh8", "abcdefgh9"
};
String[] expected = {
"(abcdefgh1)", "(abcdefgh2)", "(abcdefgh3)",
"(abcdefgh4)", "(abcdefgh5)", "(abcdefgh6)",
"(abcdefgh7)", "(abcdefgh8)", "(abcdefgh9)"
};
System.setProperty("pig.overrideBlockSize", "20");
String INPUT_FILE = "tmp.txt";
try {
PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE));
for (String s : inputs) {
w.println(s);
}
w.close();
Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE);
pigServer.registerQuery("a = load '" + INPUT_FILE + "';");
Iterator<Tuple> iter = pigServer.openIterator("a");
int counter = 0;
while (iter.hasNext()){
assertEquals(expected[counter++].toString(), iter.next().toString());
}
assertEquals(expected.length, counter);
} catch (Exception e) {
e.printStackTrace();
Assert.fail();
} finally {
new File(INPUT_FILE).delete();
try {
Util.deleteFile(cluster, INPUT_FILE);
} catch (IOException e) {
e.printStackTrace();
Assert.fail();
}
}
}
/**
* Test to verify that PigStorage works fine in the following scenario:
* The column prune optimization determines only columns 2 and 3 are needed
* and there are records in the data which have only 1 column (malformed data).
* In this case, PigStorage should return an empty tuple to represent columns
* 2 and 3 and {@link POProject} would handle catching any
* {@link IndexOutOfBoundsException} resulting from accessing a field in the
* tuple and substitute a null.
*/
@Test
public void testPruneColumnsWithMissingFields() throws IOException {
String inputFileName = "TestPigStorage-testPruneColumnsWithMissingFields-input.txt";
Util.createLocalInputFile(
inputFileName,
new String[] {"1\t2\t3", "4", "5\t6\t7"});
PigServer ps = new PigServer(ExecType.LOCAL);
String script = "a = load '" + inputFileName + "' as (i:int, j:int, k:int);" +
"b = foreach a generate j, k;";
Util.registerMultiLineQuery(ps, script);
Iterator<Tuple> it = ps.openIterator("b");
assertEquals(Util.createTuple(new Integer[] { 2, 3}), it.next());
assertEquals(Util.createTuple(new Integer[] { null, null}), it.next());
assertEquals(Util.createTuple(new Integer[] { 6, 7}), it.next());
assertFalse(it.hasNext());
}
}