/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.hadoop.zebra.pig; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.util.Iterator; import java.util.StringTokenizer; import junit.framework.Assert; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.zebra.io.BasicTable; import org.apache.hadoop.zebra.io.TableInserter; import org.apache.hadoop.zebra.pig.TableStorer; import org.apache.hadoop.zebra.parser.ParseException; import org.apache.hadoop.zebra.schema.Schema; import org.apache.hadoop.zebra.types.TypesUtils; import org.apache.hadoop.zebra.BaseTestCase; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.apache.pig.backend.executionengine.ExecJob; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; /** * * Test projections on complicated column types. * */ public class TestSimpleType extends BaseTestCase { final static String STR_SCHEMA = "s1:bool, s2:int, s3:long, s4:double, s5:string, s6:bytes"; final static String STR_STORAGE = "[s1, s2]; [s3, s4]; [s5, s6]"; private static Path path; @BeforeClass public static void setUpOnce() throws IOException, Exception { init(); path = getTableFullPath("TesMapType"); removeDir(path); BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA, STR_STORAGE, conf); Schema schema = writer.getSchema(); BasicTable.Writer writer1 = new BasicTable.Writer(path, conf); int part = 0; TableInserter inserter = writer1.getInserter("part" + part, true); Tuple tuple = TypesUtils.createTuple(schema); TypesUtils.resetTuple(tuple); // insert data in row 1 int row = 0; tuple.set(0, true); // bool tuple.set(1, 1); // int tuple.set(2, 1001L); // long tuple.set(3, 1.1); // float tuple.set(4, "hello world 1"); // string tuple.set(5, new DataByteArray("hello byte 1")); // byte inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1) .getBytes()), tuple); row++; TypesUtils.resetTuple(tuple); tuple.set(0, false); tuple.set(1, 2); // int tuple.set(2, 1002L); // long tuple.set(3, 3.1); // float tuple.set(4, "hello world 2"); // string tuple.set(5, new DataByteArray("hello byte 2")); // byte inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1) .getBytes()), tuple); inserter.close(); writer1.finish(); writer.close(); } @AfterClass public static void tearDownOnce() throws IOException { BasicTable.drop(path, conf); } /** * Return the name of the routine that called getCurrentMethodName * */ public String getCurrentMethodName() { ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintWriter pw = new PrintWriter(baos); (new Throwable()).printStackTrace(pw); pw.flush(); String stackTrace = baos.toString(); pw.close(); StringTokenizer tok = new StringTokenizer(stackTrace, "\n"); tok.nextToken(); // 'java.lang.Throwable' tok.nextToken(); // 'at ...getCurrentMethodName' String l = tok.nextToken(); // 'at ...<caller to getCurrentRoutine>' // Parse line 3 tok = new StringTokenizer(l.trim(), " <("); String t = tok.nextToken(); // 'at' t = tok.nextToken(); // '...<caller to getCurrentRoutine>' return t; } // @Test public void testReadSimpleStitch() throws IOException, ParseException { String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader('s5,s1');"; System.out.println(query); pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); BytesWritable key = new BytesWritable(); int row = 0; while (it.hasNext()) { Tuple RowValue = it.next(); System.out.println(RowValue); row++; if (row == 1) { Assert.assertEquals("hello world 1", RowValue.get(0)); Assert.assertEquals(true, RowValue.get(1)); } if (row == 2) { Assert.assertEquals("hello world 2", RowValue.get(0)); Assert.assertEquals(false, RowValue.get(1)); } } } // @Test // Test reader public void testReadSimple1() throws IOException, ParseException { String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');"; System.out.println(query); pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); BytesWritable key = new BytesWritable(); int row = 0; while (it.hasNext()) { Tuple RowValue = it.next(); System.out.println(RowValue); row++; if (row == 1) { // Assert.assertEquals(key, new // BytesWritable("k11".getBytes())); Assert.assertEquals(true, RowValue.get(5)); Assert.assertEquals(1, RowValue.get(4)); Assert.assertEquals(1001L, RowValue.get(3)); Assert.assertEquals(1.1, RowValue.get(2)); Assert.assertEquals("hello world 1", RowValue.get(1)); Assert.assertEquals("hello byte 1", RowValue.get(0).toString()); } if (row == 2) { Assert.assertEquals(false, RowValue.get(5)); Assert.assertEquals(2, RowValue.get(4)); Assert.assertEquals(1002L, RowValue.get(3)); Assert.assertEquals(3.1, RowValue.get(2)); Assert.assertEquals("hello world 2", RowValue.get(1)); Assert.assertEquals("hello byte 2", RowValue.get(0).toString()); } } } // @Test // Test reader, negative. not exist field in the projection public void testRead2() throws IOException, ParseException { try { String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader('s7');"; Assert.fail("Project should not take non-existent fields"); } catch (Exception e) { System.out.println(e); } } @Test // Store same table public void testStorer() throws ExecException, IOException { // // Use pig LOAD to load testing data for store // String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);"; pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple RowValue = it.next(); System.out.println(RowValue); } // // Use pig STORE to store testing data // Path newPath = new Path(getCurrentMethodName()); pigServer .store( "records", new Path(newPath, "store").toString(), TableStorer.class.getCanonicalName() + "('[s1, s2]; [s3, s4]')"); } @Test // store different records, second row of the previous table public void testStorer2() throws ExecException, IOException { // Load original table String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);"; System.out.println(query); pigServer.registerQuery(query); // filter the original table String query2 = "newRecord = FILTER records BY (s2 >= 2);"; pigServer.registerQuery(query2); // store the new records to new table Path newPath = new Path(getCurrentMethodName()); pigServer .store( "newRecord", newPath.toString(), TableStorer.class.getCanonicalName() + "('[s1, s2]; [s3, s4]')"); // check new table content String query3 = "newRecords = LOAD '" + newPath.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');"; System.out.println(query3); // newRecords = LOAD // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6); pigServer.registerQuery(query3); Iterator<Tuple> it3 = pigServer.openIterator("newRecords"); // BytesWritable key2 = new BytesWritable(); int row = 0; Tuple RowValue2 = null; while (it3.hasNext()) { // Last row value RowValue2 = it3.next(); row++; if (row == 1) { Assert.assertEquals(false, RowValue2.get(5)); Assert.assertEquals(2, RowValue2.get(4)); Assert.assertEquals(1002L, RowValue2.get(3)); Assert.assertEquals(3.1, RowValue2.get(2)); Assert.assertEquals("hello world 2", RowValue2.get(1)); Assert.assertEquals("hello byte 2", RowValue2.get(0).toString()); } } Assert.assertEquals(1, row); } @Test // store different records, with storage hint is empty public void testStorer3() throws ExecException, IOException { // Load original table String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);"; System.out.println(query); pigServer.registerQuery(query); // filter the original table String query2 = "newRecord = FILTER records BY (s2 >= 2);"; pigServer.registerQuery(query2); // store the new records to new table Path newPath = new Path(getCurrentMethodName()); pigServer.store("newRecord", newPath.toString(), TableStorer.class .getCanonicalName() + "('')"); // check new table content String query3 = "newRecords = LOAD '" + newPath.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');"; System.out.println(query3); // newRecords = LOAD // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6); pigServer.registerQuery(query3); Iterator<Tuple> it3 = pigServer.openIterator("newRecords"); // BytesWritable key2 = new BytesWritable(); int row = 0; Tuple RowValue2 = null; while (it3.hasNext()) { // Last row value RowValue2 = it3.next(); row++; if (row == 1) { Assert.assertEquals(false, RowValue2.get(5)); Assert.assertEquals(2, RowValue2.get(4)); Assert.assertEquals(1002L, RowValue2.get(3)); Assert.assertEquals(3.1, RowValue2.get(2)); Assert.assertEquals("hello world 2", RowValue2.get(1)); Assert.assertEquals("hello byte 2", RowValue2.get(0).toString()); } } Assert.assertEquals(1, row); } @Test // store different records, with column group is empty public void testStorer4() throws ExecException, IOException { // Load original table String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);"; System.out.println(query); pigServer.registerQuery(query); // filter the original table String query2 = "newRecord = FILTER records BY (s2 >= 2);"; pigServer.registerQuery(query2); // store the new records to new table Path newPath = new Path(getCurrentMethodName()); pigServer.store("newRecord", newPath.toString(), TableStorer.class .getCanonicalName() + "('[]')"); // check new table content String query3 = "newRecords = LOAD '" + newPath.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');"; System.out.println(query3); // newRecords = LOAD // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6); pigServer.registerQuery(query3); Iterator<Tuple> it3 = pigServer.openIterator("newRecords"); // BytesWritable key2 = new BytesWritable(); int row = 0; Tuple RowValue2 = null; while (it3.hasNext()) { // Last row value RowValue2 = it3.next(); row++; if (row == 1) { Assert.assertEquals(false, RowValue2.get(5)); Assert.assertEquals(2, RowValue2.get(4)); Assert.assertEquals(1002L, RowValue2.get(3)); Assert.assertEquals(3.1, RowValue2.get(2)); Assert.assertEquals("hello world 2", RowValue2.get(1)); Assert.assertEquals("hello byte 2", RowValue2.get(0).toString()); } } Assert.assertEquals(1, row); } @Test // negative, schema description is different from input tuple, less column // numbers public void testStorerNegative1() throws ExecException, IOException { String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple RowValue = it.next(); System.out.println(RowValue); } try { Path newPath = new Path(getCurrentMethodName()); ExecJob pigJob = pigServer .store( "records", new Path(newPath, "store").toString(), TableStorer.class.getCanonicalName() + "('[s7, s2]; [s3, s4]')"); } catch (Exception e) { System.out.println(e); return; } Assert.fail("Exception expected"); } @Test // negative, storage hint duplicate the columns public void testStorerNegative2() throws ExecException, IOException { String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple RowValue = it.next(); System.out.println(RowValue); } try { Path newPath = new Path(getCurrentMethodName()); ExecJob pigJob = pigServer .store( "records", new Path(newPath, "store").toString(), TableStorer.class.getCanonicalName() + "('[s1, s2]; [s1, s4]')"); } catch(Exception e) { System.out.println(e); return; } Assert.fail("Exception expected"); } @Test // negative, storage hint duplicate the column groups public void testStorerNegative3() throws ExecException, IOException { String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple RowValue = it.next(); System.out.println(RowValue); } try{ Path newPath = new Path(getCurrentMethodName()); ExecJob pigJob = pigServer .store( "records", new Path(newPath, "store").toString(), TableStorer.class.getCanonicalName() + "('[s1]; [s1]')"); } catch(Exception e) { System.out.println(e); return; } Assert.fail("Exception expected"); } // @Test // negative, schema description is different from input tuple, different // data types for columns public void testStorerNegative4() throws ExecException, IOException { String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader();"; pigServer.registerQuery(query); Iterator<Tuple> it = pigServer.openIterator("records"); while (it.hasNext()) { Tuple RowValue = it.next(); System.out.println(RowValue); } Path newPath = new Path(getCurrentMethodName()); ExecJob pigJob = pigServer .store( "records", new Path(newPath, "store").toString(), TableStorer.class.getCanonicalName() + "('[s1, s2]; [s3, s4]')"); Assert.assertNotNull(pigJob.getException()); System.out.println(pigJob.getException()); } @Test // Store negative, store to same path. Store should fail public void testStorer5() throws ExecException, IOException { // // Use pig LOAD to load testing data for store // String query = "records = LOAD '" + path.toString() + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);"; pigServer.registerQuery(query); // // Use pig STORE to store testing data // System.out.println("path = " + path); try { ExecJob pigJob = pigServer .store( "records", path.toString(), TableStorer.class.getCanonicalName() + "('[s1, s2]; [s3, s4]')"); } catch(Exception e) { System.out.println(e); return; } Assert.fail("Exception expected"); } }