/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import static org.junit.Assert.*;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Random;
import junit.framework.TestCase;
import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileLocalizer;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
public class TestPigContext {
private static final String TMP_DIR_PROP = "/tmp/hadoop-hadoop";
private static final String FS_NAME = "file:///";
private static final String JOB_TRACKER = "local";
private File input;
private PigContext pigContext;
static MiniCluster cluster = null;
@BeforeClass
public static void oneTimeSetup(){
cluster = MiniCluster.buildCluster();
}
@Before
public void setUp() throws Exception {
pigContext = new PigContext(ExecType.LOCAL, getProperties());
input = File.createTempFile("PigContextTest-", ".txt");
}
/**
* Passing an already configured pigContext in PigServer constructor.
*/
@Test
public void testSetProperties_way_num01() throws Exception {
PigServer pigServer = new PigServer(pigContext);
registerAndStore(pigServer);
check_asserts(pigServer);
}
/**
* Setting properties through PigServer constructor directly.
*/
@Test
public void testSetProperties_way_num02() throws Exception {
PigServer pigServer = new PigServer(ExecType.LOCAL, getProperties());
registerAndStore(pigServer);
check_asserts(pigServer);
}
/**
* using connect() method.
*/
@Test
public void testSetProperties_way_num03() throws Exception {
pigContext.connect();
PigServer pigServer = new PigServer(pigContext);
registerAndStore(pigServer);
check_asserts(pigServer);
}
@Test
public void testHadoopExceptionCreation() throws Exception {
Object object = PigContext.instantiateFuncFromSpec("org.apache.hadoop.mapred.FileAlreadyExistsException");
assertTrue(object instanceof FileAlreadyExistsException);
}
@Test
// See PIG-832
public void testImportList() throws Exception {
String FILE_SEPARATOR = System.getProperty("file.separator");
File tmpDir = File.createTempFile("test", "");
tmpDir.delete();
tmpDir.mkdir();
File udf1Dir = new File(tmpDir.getAbsolutePath()+FILE_SEPARATOR+"com"+FILE_SEPARATOR+"xxx"+FILE_SEPARATOR+"udf1");
udf1Dir.mkdirs();
File udf2Dir = new File(tmpDir.getAbsolutePath()+FILE_SEPARATOR+"com"+FILE_SEPARATOR+"xxx"+FILE_SEPARATOR+"udf2");
udf2Dir.mkdirs();
File udf1JavaSrc = new File(udf1Dir.getAbsolutePath()+FILE_SEPARATOR+"TestUDF1.java");
File udf2JavaSrc = new File(udf2Dir.getAbsolutePath()+FILE_SEPARATOR+"TestUDF2.java");
String udf1Src = new String("package com.xxx.udf1;\n"+
"import java.io.IOException;\n"+
"import org.apache.pig.EvalFunc;\n"+
"import org.apache.pig.data.Tuple;\n"+
"public class TestUDF1 extends EvalFunc<Integer>{\n"+
"public Integer exec(Tuple input) throws IOException {\n"+
"return 1;}\n"+
"}");
String udf2Src = new String("package com.xxx.udf2;\n"+
"import org.apache.pig.builtin.PigStorage;\n" +
"public class TestUDF2 extends PigStorage { }\n");
// generate java file
FileOutputStream outStream1 =
new FileOutputStream(udf1JavaSrc);
OutputStreamWriter outWriter1 = new OutputStreamWriter(outStream1);
outWriter1.write(udf1Src);
outWriter1.close();
FileOutputStream outStream2 =
new FileOutputStream(udf2JavaSrc);
OutputStreamWriter outWriter2 = new OutputStreamWriter(outStream2);
outWriter2.write(udf2Src);
outWriter2.close();
// compile
int status;
status = Util.executeJavaCommand("javac -cp "+System.getProperty("java.class.path") + " " + udf1JavaSrc);
status = Util.executeJavaCommand("javac -cp "+System.getProperty("java.class.path") + " " + udf2JavaSrc);
// generate jar file
String jarName = "TestUDFJar.jar";
String jarFile = tmpDir.getAbsolutePath() + FILE_SEPARATOR + jarName;
status = Util.executeJavaCommand("jar -cf " + tmpDir.getAbsolutePath() + FILE_SEPARATOR + jarName +
" -C " + tmpDir.getAbsolutePath() + " " + "com");
assertTrue(status==0);
Properties properties = cluster.getProperties();
PigContext localPigContext = new PigContext(ExecType.MAPREDUCE, properties);
//register jar using properties
localPigContext.getProperties().setProperty("pig.additional.jars", jarFile);
PigServer pigServer = new PigServer(localPigContext);
PigContext.initializeImportList("com.xxx.udf1:com.xxx.udf2.");
ArrayList<String> importList = PigContext.getPackageImportList();
assertTrue(importList.size()==5);
assertTrue(importList.get(0).equals("com.xxx.udf1."));
assertTrue(importList.get(1).equals("com.xxx.udf2."));
assertTrue(importList.get(2).equals(""));
assertTrue(importList.get(3).equals("org.apache.pig.builtin."));
assertTrue(importList.get(4).equals("org.apache.pig.impl.builtin."));
Object udf = PigContext.instantiateFuncFromSpec("TestUDF1");
assertTrue(udf.getClass().toString().endsWith("com.xxx.udf1.TestUDF1"));
int LOOP_COUNT = 40;
File tmpFile = File.createTempFile("test", "txt");
tmpFile.deleteOnExit();
String localInput[] = new String[LOOP_COUNT];
Random r = new Random(1);
int rand;
for(int i = 0; i < LOOP_COUNT; i++) {
rand = r.nextInt(100);
localInput[i] = Integer.toString(rand);
}
Util.createInputFile(cluster, tmpFile.getCanonicalPath(), localInput);
FileLocalizer.deleteTempFiles();
pigServer.registerQuery("A = LOAD '" + tmpFile.getCanonicalPath() + "' using TestUDF2() AS (num:chararray);");
pigServer.registerQuery("B = foreach A generate TestUDF1(num);");
Iterator<Tuple> iter = pigServer.openIterator("B");
if(!iter.hasNext()) fail("No output found");
while(iter.hasNext()){
Tuple t = iter.next();
assertTrue(t.get(0) instanceof Integer);
assertTrue((Integer)t.get(0) == 1);
}
Util.deleteFile(cluster, tmpFile.getCanonicalPath());
Util.deleteDirectory(tmpDir);
}
// See PIG-1824
@SuppressWarnings("deprecation")
@Test
public void testScriptFiles() throws Exception {
PigContext pc = new PigContext(ExecType.LOCAL, getProperties());
final int n = pc.scriptFiles.size();
pc.addScriptFile("test/path-1824");
assertEquals("test/path-1824", pc.getScriptFiles().get("test/path-1824").toString());
assertEquals("script files should not be populated", n, pc.scriptFiles.size());
pc.addScriptFile("path-1824", "test/path-1824");
assertEquals("test/path-1824", pc.getScriptFiles().get("path-1824").toString());
assertEquals("script files should not be populated", n, pc.scriptFiles.size());
// last add wins when using an alias
pc.addScriptFile("path-1824", "test/some/other/path-1824");
assertEquals("test/some/other/path-1824", pc.getScriptFiles().get("path-1824").toString());
assertEquals("script files should not be populated", n, pc.scriptFiles.size());
// clean up
pc.getScriptFiles().remove("path-1824");
pc.getScriptFiles().remove("test/path-1824");
}
@After
public void tearDown() throws Exception {
input.delete();
}
@AfterClass
public static void oneTimeTearDown() throws Exception {
cluster.shutDown();
}
private static Properties getProperties() {
Properties props = new Properties();
props.put("mapred.job.tracker", JOB_TRACKER);
props.put("fs.default.name", FS_NAME);
props.put("hadoop.tmp.dir", TMP_DIR_PROP);
return props;
}
private List<String> getCommands() {
List<String> commands = new ArrayList<String>();
commands.add("my_input = LOAD '" + Util.encodeEscape(input.getAbsolutePath()) + "' USING PigStorage();");
commands.add("words = FOREACH my_input GENERATE FLATTEN(TOKENIZE($0));");
commands.add("grouped = GROUP words BY $0;");
commands.add("counts = FOREACH grouped GENERATE group, COUNT(words);");
return commands;
}
private void registerAndStore(PigServer pigServer) throws IOException {
// pigServer.debugOn();
List<String> commands = getCommands();
for (final String command : commands) {
pigServer.registerQuery(command);
}
String outFile = input.getAbsolutePath() + ".out";
pigServer.store("counts", outFile);
Util.deleteFile(cluster, outFile);
}
private void check_asserts(PigServer pigServer) {
assertEquals(JOB_TRACKER, pigServer.getPigContext().getProperties().getProperty("mapred.job.tracker"));
assertEquals(FS_NAME, pigServer.getPigContext().getProperties().getProperty("fs.default.name"));
assertEquals(TMP_DIR_PROP, pigServer.getPigContext().getProperties().getProperty("hadoop.tmp.dir"));
}
}