/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.zebra.pig;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.zebra.BaseTestCase;
import org.apache.hadoop.zebra.mapreduce.BasicTableOutputFormat;
import org.apache.hadoop.zebra.mapreduce.ZebraOutputPartition;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.pig.backend.executionengine.ExecJob;
import org.apache.pig.data.Tuple;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.Iterator;
import junit.framework.Assert;
/**
* Assume the input files contain rows of word and count, separated by a space:
*
* <pre>
* us 2
* japan 2
* india 4
* us 2
* japan 1
* india 3
* nouse 5
* nowhere 4
*
*/
public class TestMultipleOutputs2 extends BaseTestCase implements Tool {
static String inputPath;
static String inputFileName = "multi-input.txt";
public static String sortKey = null;
@Before
public void setUp() throws Exception {
init();
inputPath = getTableFullPath(inputFileName).toString();
writeToFile(inputPath);
}
@After
public void tearDown() throws Exception {
if (mode == TestMode.local) {
pigServer.shutdown();
}
}
public static void writeToFile (String inputFile) throws IOException{
if (mode == TestMode.local) {
FileWriter fstream = new FileWriter(inputFile);
BufferedWriter out = new BufferedWriter(fstream);
out.write("us\t2\n");
out.write("japan\t2\n");
out.write("india\t4\n");
out.write("us\t2\n");
out.write("japan\t1\n");
out.write("india\t3\n");
out.write("nouse\t5\n");
out.write("nowhere\t4\n");
out.close();
}
if (mode == TestMode.cluster) {
FSDataOutputStream fout = fs.create(new Path (inputFile));
fout.writeBytes("us\t2\n");
fout.writeBytes("japan\t2\n");
fout.writeBytes("india\t4\n");
fout.writeBytes("us\t2\n");
fout.writeBytes("japan\t1\n");
fout.writeBytes("india\t3\n");
fout.writeBytes("nouse\t5\n");
fout.writeBytes("nowhere\t4\n");
fout.close();
}
}
@Test
public void test1() throws ParseException, IOException,
org.apache.hadoop.zebra.parser.ParseException, Exception {
// Load data;
String query = "records = LOAD '" + inputPath + "' as (word:chararray, count:int);";
System.out.println("query = " + query);
pigServer.registerQuery(query);
Iterator<Tuple> it = pigServer.openIterator("records");
while (it.hasNext()) {
Tuple cur = it.next();
System.out.println(cur);
}
// Store using multiple outputs;
String outputPaths = "us_0,india_1,japan_2";
removeDir(getTableFullPath("us_0"));
removeDir(getTableFullPath("india_1"));
removeDir(getTableFullPath("japan_2"));
ExecJob pigJob = pigServer
.store(
"records",
outputPaths,
TableStorer.class.getCanonicalName() +
"('[word,count]', 'org.apache.hadoop.zebra.pig.TestMultipleOutputs2$OutputPartitionerClass', 'us,india,japan')");
Assert.assertNull(pigJob.getException());
// Validate results;
query = "records = LOAD '" + "us_0"
+ "' USING org.apache.hadoop.zebra.pig.TableLoader();";
int count = 0;
System.out.println(query);
pigServer.registerQuery(query);
it = pigServer.openIterator("records");
while (it.hasNext()) {
count ++;
Tuple RowValue = it.next();
System.out.println(RowValue);
if (count == 1) {
Assert.assertEquals("us", RowValue.get(0));
Assert.assertEquals(2, RowValue.get(1));
} else if (count == 2) {
Assert.assertEquals("us", RowValue.get(0));
Assert.assertEquals(2, RowValue.get(1));
} else if (count == 3) {
Assert.assertEquals("nouse", RowValue.get(0));
Assert.assertEquals(5, RowValue.get(1));
} else if (count == 4) {
Assert.assertEquals("nowhere", RowValue.get(0));
Assert.assertEquals(4, RowValue.get(1));
}
}
Assert.assertEquals(count, 4);
query = "records = LOAD '" + "india_1"
+ "' USING org.apache.hadoop.zebra.pig.TableLoader();";
count = 0;
System.out.println(query);
pigServer.registerQuery(query);
it = pigServer.openIterator("records");
while (it.hasNext()) {
count ++;
Tuple RowValue = it.next();
System.out.println(RowValue);
if (count == 1) {
Assert.assertEquals("india", RowValue.get(0));
Assert.assertEquals(4, RowValue.get(1));
} else if (count == 2) {
Assert.assertEquals("india", RowValue.get(0));
Assert.assertEquals(3, RowValue.get(1));
}
}
Assert.assertEquals(count, 2);
query = "records = LOAD '" + "japan_2"
+ "' USING org.apache.hadoop.zebra.pig.TableLoader();";
count = 0;
System.out.println(query);
pigServer.registerQuery(query);
it = pigServer.openIterator("records");
while (it.hasNext()) {
count ++;
Tuple RowValue = it.next();
System.out.println(RowValue);
if (count == 1) {
Assert.assertEquals("japan", RowValue.get(0));
Assert.assertEquals(2, RowValue.get(1));
} else if (count == 2) {
Assert.assertEquals("japan", RowValue.get(0));
Assert.assertEquals(1, RowValue.get(1));
}
}
Assert.assertEquals(count, 2);
}
public static class OutputPartitionerClass extends ZebraOutputPartition {
@Override
public int getOutputPartition(BytesWritable key, Tuple value) {
String reg = null;
try {
reg = (String) (value.get(0));
} catch (Exception e) {
//
}
String argumentsString = BasicTableOutputFormat.getOutputPartitionClassArguments(conf);
String[] arguments = argumentsString.split(",");
if (reg.equals(arguments[0]))
return 0;
if (reg.equals(arguments[1]))
return 1;
if (reg.equals(arguments[2]))
return 2;
return 0;
}
}
@Override
public int run(String[] args) throws Exception {
TestMultipleOutputs2 test = new TestMultipleOutputs2();
test.setUp();
test.test1();
test.tearDown();
return 0;
}
public static void main(String[] args) throws Exception {
conf = new Configuration();
int res = ToolRunner.run(conf, new TestMultipleOutputs2(), args);
System.out.println("PASS");
System.exit(res);
}
}