/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.zebra.pig; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.zebra.BaseTestCase; import org.apache.hadoop.zebra.mapreduce.ZebraOutputPartition; import org.apache.hadoop.zebra.parser.ParseException; import org.apache.pig.backend.executionengine.ExecJob; import org.apache.pig.data.Tuple; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.util.Iterator; import junit.framework.Assert; /** * Assume the input files contain rows of word and count, separated by a space: * * <pre> * us 2 * japan 2 * india 4 * us 2 * japan 1 * india 3 * nouse 5 * nowhere 4 * */ public class TestMultipleOutputs1 extends BaseTestCase implements Tool { static String inputPath; static String inputFileName = "multi-input.txt"; public static String sortKey = null; @Before public void setUp() throws Exception { init(); inputPath = getTableFullPath(inputFileName).toString(); writeToFile(inputPath); } @After public void tearDown() throws Exception { if (mode == TestMode.local) { pigServer.shutdown(); } } public static void writeToFile (String inputFile) throws IOException{ if (mode == TestMode.local) { FileWriter fstream = new FileWriter(inputFile); BufferedWriter out = new BufferedWriter(fstream); out.write("us\t2\n"); out.write("japan\t2\n"); out.write("india\t4\n"); out.write("us\t2\n"); out.write("japan\t1\n"); out.write("india\t3\n"); out.write("nouse\t5\n"); out.write("nowhere\t4\n"); out.close(); } if (mode == TestMode.cluster) { FSDataOutputStream fout = fs.create(new Path (inputFile)); fout.writeBytes("us\t2\n"); fout.writeBytes("japan\t2\n"); fout.writeBytes("india\t4\n"); fout.writeBytes("us\t2\n"); fout.writeBytes("japan\t1\n"); fout.writeBytes("india\t3\n"); fout.writeBytes("nouse\t5\n"); fout.writeBytes("nowhere\t4\n"); fout.close(); } } // test no sort key; @Test public void test1() throws ParseException, IOException, org.apache.hadoop.zebra.parser.ParseException, Exception { // Load data; String query = "records = LOAD '" + inputPath + "' as (word:chararray, count:int);"; System.out.println("query = " + query); pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple cur = it.next(); System.out.println(cur); } // Store using multiple outputs; String outputPaths = "us,india,japan"; removeDir(getTableFullPath("us")); removeDir(getTableFullPath("india")); removeDir(getTableFullPath("japan")); query = "store records into '" + outputPaths + "' using org.apache.hadoop.zebra.pig.TableStorer('[word,count]'," + "'org.apache.hadoop.zebra.pig.TestMultipleOutputs1$OutputPartitionerClass');"; System.out.println("query = " + query); pigServer.registerQuery(query); // Validate results; query = "records = LOAD '" + "us" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; int count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("us", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("us", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } else if (count == 3) { Assert.assertEquals("nouse", RowValue.get(0)); Assert.assertEquals(5, RowValue.get(1)); } else if (count == 4) { Assert.assertEquals("nowhere", RowValue.get(0)); Assert.assertEquals(4, RowValue.get(1)); } } Assert.assertEquals(count, 4); query = "records = LOAD '" + "india" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("india", RowValue.get(0)); Assert.assertEquals(4, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("india", RowValue.get(0)); Assert.assertEquals(3, RowValue.get(1)); } } Assert.assertEquals(count, 2); query = "records = LOAD '" + "japan" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("japan", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("japan", RowValue.get(0)); Assert.assertEquals(1, RowValue.get(1)); } } Assert.assertEquals(count, 2); } //Test sort key on word; @Test public void test2() throws ParseException, IOException, org.apache.hadoop.zebra.parser.ParseException, Exception { // Load data; String query = "a = LOAD '" + inputPath + "' as (word:chararray, count:int);"; System.out.println("query = " + query); pigServer.registerQuery(query); query = "records = order a by word;"; System.out.println("query = " + query); pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple cur = it.next(); System.out.println(cur); } // Store using multiple outputs; String outputPaths = "us,india,japan"; removeDir(getTableFullPath("us")); removeDir(getTableFullPath("india")); removeDir(getTableFullPath("japan")); ExecJob pigJob = pigServer .store( "records", outputPaths, TableStorer.class.getCanonicalName() + "('[word,count]', 'org.apache.hadoop.zebra.pig.TestMultipleOutputs1$OutputPartitionerClass')"); Assert.assertNull(pigJob.getException()); // Validate results; query = "records = LOAD '" + "us" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; int count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("nouse", RowValue.get(0)); Assert.assertEquals(5, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("nowhere", RowValue.get(0)); Assert.assertEquals(4, RowValue.get(1)); } else if (count == 3) { Assert.assertEquals("us", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } else if (count == 4) { Assert.assertEquals("us", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } } Assert.assertEquals(count, 4); query = "records = LOAD '" + "india" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("india", RowValue.get(0)); Assert.assertEquals(4, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("india", RowValue.get(0)); Assert.assertEquals(3, RowValue.get(1)); } } Assert.assertEquals(count, 2); query = "records = LOAD '" + "japan" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("japan", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("japan", RowValue.get(0)); Assert.assertEquals(1, RowValue.get(1)); } } Assert.assertEquals(count, 2); } //Test sort key on word and count; @Test public void test3() throws ParseException, IOException, org.apache.hadoop.zebra.parser.ParseException, Exception { // Load data; String query = "a = LOAD '" + inputPath + "' as (word:chararray, count:int);"; System.out.println("query = " + query); pigServer.registerQuery(query); query = "records = order a by word, count;"; System.out.println("query = " + query); pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple cur = it.next(); System.out.println(cur); } // Store using multiple outputs; String outputPaths = "us,india,japan"; removeDir(getTableFullPath("us")); removeDir(getTableFullPath("india")); removeDir(getTableFullPath("japan")); ExecJob pigJob = pigServer .store( "records", outputPaths, TableStorer.class.getCanonicalName() + "('[word,count]', 'org.apache.hadoop.zebra.pig.TestMultipleOutputs1$OutputPartitionerClass')"); Assert.assertNull(pigJob.getException()); // Validate results; query = "records = LOAD '" + "us" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; int count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("nouse", RowValue.get(0)); Assert.assertEquals(5, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("nowhere", RowValue.get(0)); Assert.assertEquals(4, RowValue.get(1)); } else if (count == 3) { Assert.assertEquals("us", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } else if (count == 4) { Assert.assertEquals("us", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } } Assert.assertEquals(count, 4); query = "records = LOAD '" + "india" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("india", RowValue.get(0)); Assert.assertEquals(3, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("india", RowValue.get(0)); Assert.assertEquals(4, RowValue.get(1)); } } Assert.assertEquals(count, 2); query = "records = LOAD '" + "japan" + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; count = 0; System.out.println(query); pigServer.registerQuery(query); it = pigServer.openIterator("records"); while (it.hasNext()) { count ++; Tuple RowValue = it.next(); System.out.println(RowValue); if (count == 1) { Assert.assertEquals("japan", RowValue.get(0)); Assert.assertEquals(1, RowValue.get(1)); } else if (count == 2) { Assert.assertEquals("japan", RowValue.get(0)); Assert.assertEquals(2, RowValue.get(1)); } } Assert.assertEquals(count, 2); } //Negative test case: invalid partition class; @Test (expected = IOException.class) public void testNegative1() throws ParseException, IOException, org.apache.hadoop.zebra.parser.ParseException, Exception { // Load data; String query = "a = LOAD '" + inputPath + "' as (word:chararray, count:int);"; System.out.println("query = " + query); pigServer.registerQuery(query); query = "records = order a by word, count;"; System.out.println("query = " + query); pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple cur = it.next(); System.out.println(cur); } // Store using multiple outputs; String outputPaths = "us,india,japan"; removeDir(getTableFullPath("us")); removeDir(getTableFullPath("india")); removeDir(getTableFullPath("japan")); pigServer .store( "records", outputPaths, TableStorer.class.getCanonicalName() + "('[word,count]', 'org.apache.hadoop.zebra.pig.notexistingclass')"); } public static class OutputPartitionerClass extends ZebraOutputPartition { @Override public int getOutputPartition(BytesWritable key, Tuple value) { String reg = null; try { reg = (String) (value.get(0)); } catch (Exception e) { // } if (reg.equals("us")) return 0; if (reg.equals("india")) return 1; if (reg.equals("japan")) return 2; return 0; } } @Override public int run(String[] args) throws Exception { TestMultipleOutputs1 test = new TestMultipleOutputs1(); test.setUp(); test.test1(); test.tearDown(); test.setUp(); test.test2(); test.tearDown(); test.setUp(); test.test3(); test.tearDown(); test.setUp(); test.testNegative1(); test.tearDown(); return 0; } public static void main(String[] args) throws Exception { conf = new Configuration(); int res = ToolRunner.run(conf, new TestMultipleOutputs1(), args); System.out.println("PASS"); System.exit(res); } }