/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.pig; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.math.BigDecimal; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hive.hcatalog.HcatTestUtils; import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; import org.apache.pig.EvalFunc; import org.apache.pig.ExecType; import org.apache.pig.PigException; import org.apache.pig.PigServer; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.util.LogUtils; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public abstract class AbstractHCatStorerTest extends HCatBaseTest { static Logger LOG = LoggerFactory.getLogger(AbstractHCatStorerTest.class); static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; String storageFormat; public AbstractHCatStorerTest() { storageFormat = getStorageFormat(); } // Start: tests that check values from Pig that are out of range for target column @Test public void testWriteTinyint() throws Exception { pigValueRangeTest("junitTypeTest1", "tinyint", "int", null, Integer.toString(1), Integer.toString(1)); pigValueRangeTestOverflow("junitTypeTest1", "tinyint", "int", null, Integer.toString(300)); pigValueRangeTestOverflow("junitTypeTest2", "tinyint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, Integer.toString(300)); pigValueRangeTestOverflow("junitTypeTest3", "tinyint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, Integer.toString(300)); } @Test public void testWriteSmallint() throws Exception { pigValueRangeTest("junitTypeTest1", "smallint", "int", null, Integer.toString(Short.MIN_VALUE), Integer.toString(Short.MIN_VALUE)); pigValueRangeTestOverflow("junitTypeTest2", "smallint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, Integer.toString(Short.MAX_VALUE + 1)); pigValueRangeTestOverflow("junitTypeTest3", "smallint", "int", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, Integer.toString(Short.MAX_VALUE + 1)); } @Test public void testWriteChar() throws Exception { pigValueRangeTest("junitTypeTest1", "char(5)", "chararray", null, "xxx", "xxx "); pigValueRangeTestOverflow("junitTypeTest1", "char(5)", "chararray", null, "too_long"); pigValueRangeTestOverflow("junitTypeTest2", "char(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, "too_long"); pigValueRangeTestOverflow("junitTypeTest3", "char(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, "too_long2"); } @Test public void testWriteVarchar() throws Exception { pigValueRangeTest("junitTypeTest1", "varchar(5)", "chararray", null, "xxx", "xxx"); pigValueRangeTestOverflow("junitTypeTest1", "varchar(5)", "chararray", null, "too_long"); pigValueRangeTestOverflow("junitTypeTest2", "varchar(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, "too_long"); pigValueRangeTestOverflow("junitTypeTest3", "varchar(5)", "chararray", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, "too_long2"); } @Test public void testWriteDecimalXY() throws Exception { pigValueRangeTest("junitTypeTest1", "decimal(5,2)", "bigdecimal", null, BigDecimal.valueOf(1.2) .toString(), BigDecimal.valueOf(1.2).toString()); pigValueRangeTestOverflow("junitTypeTest1", "decimal(5,2)", "bigdecimal", null, BigDecimal .valueOf(12345.12).toString()); pigValueRangeTestOverflow("junitTypeTest2", "decimal(5,2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, BigDecimal.valueOf(500.123).toString()); pigValueRangeTestOverflow("junitTypeTest3", "decimal(5,2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(500.123).toString()); } @Test public void testWriteDecimalX() throws Exception { // interestingly decimal(2) means decimal(2,0) pigValueRangeTest("junitTypeTest1", "decimal(2)", "bigdecimal", null, BigDecimal.valueOf(12) .toString(), BigDecimal.valueOf(12).toString()); pigValueRangeTestOverflow("junitTypeTest2", "decimal(2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, BigDecimal.valueOf(50.123).toString()); pigValueRangeTestOverflow("junitTypeTest3", "decimal(2)", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(50.123).toString()); } @Test public void testWriteDecimal() throws Exception { // decimal means decimal(10,0) pigValueRangeTest("junitTypeTest1", "decimal", "bigdecimal", null, BigDecimal.valueOf(1234567890).toString(), BigDecimal.valueOf(1234567890).toString()); pigValueRangeTestOverflow("junitTypeTest2", "decimal", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, BigDecimal.valueOf(12345678900L).toString()); pigValueRangeTestOverflow("junitTypeTest3", "decimal", "bigdecimal", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, BigDecimal.valueOf(12345678900L).toString()); } /** * because we want to ignore TZ which is included in toString() include time to make sure it's 0 */ private static final String FORMAT_4_DATE = "yyyy-MM-dd HH:mm:ss"; @Test public void testWriteDate() throws Exception { DateTime d = new DateTime(1991, 10, 11, 0, 0); pigValueRangeTest("junitTypeTest1", "date", "datetime", null, d.toString(), d.toString(FORMAT_4_DATE), FORMAT_4_DATE); pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusHours(2).toString(), FORMAT_4_DATE);// time // != 0 pigValueRangeTestOverflow("junitTypeTest3", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE);// time // != // 0 d = new DateTime(1991, 10, 11, 0, 0, DateTimeZone.forOffsetHours(-11)); pigValueRangeTest("junitTypeTest4", "date", "datetime", null, d.toString(), d.toString(FORMAT_4_DATE), FORMAT_4_DATE); pigValueRangeTestOverflow("junitTypeTest5", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusHours(2).toString(), FORMAT_4_DATE);// date // out // of // range // due // to // time // != 0 pigValueRangeTestOverflow("junitTypeTest6", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE);// date // out // of // range // due // to // time!=0 } @Test public void testWriteDate3() throws Exception { DateTime d = new DateTime(1991, 10, 11, 23, 10, DateTimeZone.forOffsetHours(-11)); FrontendException fe = null; // expect to fail since the time component is not 0 pigValueRangeTestOverflow("junitTypeTest4", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.toString(), FORMAT_4_DATE); pigValueRangeTestOverflow("junitTypeTest5", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusHours(2).toString(), FORMAT_4_DATE); pigValueRangeTestOverflow("junitTypeTest6", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE); } @Test public void testWriteDate2() throws Exception { DateTime d = new DateTime(1991, 11, 12, 0, 0, DateTimeZone.forID("US/Eastern")); pigValueRangeTest("junitTypeTest1", "date", "datetime", null, d.toString(), d.toString(FORMAT_4_DATE), FORMAT_4_DATE); pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusHours(2).toString(), FORMAT_4_DATE); pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.plusMillis(20).toString(), FORMAT_4_DATE); pigValueRangeTestOverflow("junitTypeTest2", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMillis(12).toString(), FORMAT_4_DATE); pigValueRangeTestOverflow("junitTypeTest3", "date", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Throw, d.plusMinutes(1).toString(), FORMAT_4_DATE); } /** * Note that the value that comes back from Hive will have local TZ on it. Using local is * arbitrary but DateTime needs TZ (or will assume default) and Hive does not have TZ. So if you * start with Pig value in TZ=x and write to Hive, when you read it back the TZ may be different. * The millis value should match, of course. * * @throws Exception */ @Test public void testWriteTimestamp() throws Exception { DateTime d = new DateTime(1991, 10, 11, 14, 23, 30, 10);// uses default TZ pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.plusHours(2); pigValueRangeTest("junitTypeTest2", "timestamp", "datetime", HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.toDateTime(DateTimeZone.UTC); pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); d = new DateTime(1991, 10, 11, 23, 24, 25, 26); pigValueRangeTest("junitTypeTest1", "timestamp", "datetime", null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); d = d.toDateTime(DateTimeZone.UTC); pigValueRangeTest("junitTypeTest3", "timestamp", "datetime", null, d.toString(), d.toDateTime(DateTimeZone.getDefault()).toString()); } // End: tests that check values from Pig that are out of range for target column void pigValueRangeTestOverflow(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String format) throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, null, format); } void pigValueRangeTestOverflow(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue) throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, null, null); } void pigValueRangeTest(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue) throws Exception { pigValueRangeTest(tblName, hiveType, pigType, goal, inputValue, expectedValue, null); } /** * This is used to test how Pig values of various data types which are out of range for Hive * target column are handled. Currently the options are to raise an error or write NULL. 1. create * a data file with 1 column, 1 row 2. load into pig 3. use pig to store into Hive table 4. read * from Hive table using Pig 5. check that read value is what is expected * * @param tblName Hive table name to create * @param hiveType datatype to use for the single column in table * @param pigType corresponding Pig type when loading file into Pig * @param goal how out-of-range values from Pig are handled by HCat, may be {@code null} * @param inputValue written to file which is read by Pig, thus must be something Pig can read * (e.g. DateTime.toString(), rather than java.sql.Date) * @param expectedValue what Pig should see when reading Hive table * @param format date format to use for comparison of values since default DateTime.toString() * includes TZ which is meaningless for Hive DATE type */ void pigValueRangeTest(String tblName, String hiveType, String pigType, HCatBaseStorer.OOR_VALUE_OPT_VALUES goal, String inputValue, String expectedValue, String format) throws Exception { AbstractHCatLoaderTest.dropTable(tblName, driver); final String field = "f1"; AbstractHCatLoaderTest.createTable(tblName, field + " " + hiveType, null, driver, storageFormat); HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, new String[] { inputValue }); LOG.debug("File=" + INPUT_FILE_NAME); dumpFile(INPUT_FILE_NAME); PigServer server = createPigServer(true); int queryNumber = 1; logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (" + field + ":" + pigType + ");", queryNumber++); Iterator<Tuple> firstLoad = server.openIterator("A"); if (goal == null) { logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++); } else { FrontendException fe = null; try { logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "('','','-" + HCatStorer.ON_OOR_VALUE_OPT + " " + goal + "');", queryNumber++); } catch (FrontendException e) { fe = e; } switch (goal) { case Null: // do nothing, fall through and verify the data break; case Throw: assertTrue("Expected a FrontendException", fe != null); assertEquals("Expected a different FrontendException.", fe.getMessage(), "Unable to store alias A"); return;// this test is done default: assertFalse("Unexpected goal: " + goal, 1 == 1); } } logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber); CommandProcessorResponse cpr = driver.run("select * from " + tblName); LOG.debug("cpr.respCode=" + cpr.getResponseCode() + " cpr.errMsg=" + cpr.getErrorMessage() + " for table " + tblName); List l = new ArrayList(); driver.getResults(l); LOG.debug("Dumping rows via SQL from " + tblName); for (Object t : l) { LOG.debug(t == null ? null : t.toString() + " t.class=" + t.getClass()); } Iterator<Tuple> itr = server.openIterator("B"); int numRowsRead = 0; while (itr.hasNext()) { Tuple t = itr.next(); if ("date".equals(hiveType)) { DateTime dateTime = (DateTime) t.get(0); assertTrue(format != null); assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, dateTime == null ? null : dateTime.toString(format)); } else { assertEquals("Comparing Pig to Raw data for table " + tblName, expectedValue, t.isNull(0) ? null : t.get(0).toString()); } // see comment at "Dumping rows via SQL..." for why this doesn't work // assertEquals("Comparing Pig to Hive", t.get(0), l.get(0)); numRowsRead++; } assertEquals("Expected " + 1 + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME + "; table " + tblName, 1, numRowsRead); /* * Misc notes: Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a * String thus the timestamp in 't' doesn't match rawData */ } abstract String getStorageFormat(); /** * Create a data file with datatypes added in 0.13. Read it with Pig and use Pig + HCatStorer to * write to a Hive table. Then read it using Pig and Hive and make sure results match. */ @Test public void testDateCharTypes() throws Exception { final String tblName = "junit_date_char"; AbstractHCatLoaderTest.dropTable(tblName, driver); AbstractHCatLoaderTest.createTable(tblName, "id int, char5 char(5), varchar10 varchar(10), dec52 decimal(5,2)", null, driver, storageFormat); int NUM_ROWS = 5; String[] rows = new String[NUM_ROWS]; for (int i = 0; i < NUM_ROWS; i++) { // since the file is read by Pig, we need to make sure the values are in format that Pig // understands // otherwise it will turn the value to NULL on read rows[i] = i + "\txxxxx\tyyy\t" + 5.2; } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, rows); LOG.debug("File=" + INPUT_FILE_NAME); // dumpFile(INPUT_FILE_NAME); PigServer server = createPigServer(true); int queryNumber = 1; logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (id:int, char5:chararray, varchar10:chararray, dec52:bigdecimal);", queryNumber++); logAndRegister(server, "store A into '" + tblName + "' using " + HCatStorer.class.getName() + "();", queryNumber++); logAndRegister(server, "B = load '" + tblName + "' using " + HCatLoader.class.getName() + "();", queryNumber); CommandProcessorResponse cpr = driver.run("select * from " + tblName); LOG.debug("cpr.respCode=" + cpr.getResponseCode() + " cpr.errMsg=" + cpr.getErrorMessage()); List l = new ArrayList(); driver.getResults(l); LOG.debug("Dumping rows via SQL from " + tblName); /* * Unfortunately Timestamp.toString() adjusts the value for local TZ and 't' is a String thus * the timestamp in 't' doesn't match rawData */ for (Object t : l) { LOG.debug(t == null ? null : t.toString()); } Iterator<Tuple> itr = server.openIterator("B"); int numRowsRead = 0; while (itr.hasNext()) { Tuple t = itr.next(); StringBuilder rowFromPig = new StringBuilder(); for (int i = 0; i < t.size(); i++) { rowFromPig.append(t.get(i)).append("\t"); } rowFromPig.setLength(rowFromPig.length() - 1); assertEquals("Comparing Pig to Raw data", rows[numRowsRead], rowFromPig.toString()); // see comment at "Dumping rows via SQL..." for why this doesn't work (for all types) // assertEquals("Comparing Pig to Hive", rowFromPig.toString(), l.get(numRowsRead)); numRowsRead++; } assertEquals("Expected " + NUM_ROWS + " rows; got " + numRowsRead + " file=" + INPUT_FILE_NAME, NUM_ROWS, numRowsRead); } static void dumpFile(String fileName) throws Exception { File file = new File(fileName); BufferedReader reader = new BufferedReader(new FileReader(file)); String line = null; LOG.debug("Dumping raw file: " + fileName); while ((line = reader.readLine()) != null) { LOG.debug(line); } reader.close(); } @Test public void testPartColsInData() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int LOOP_SIZE = 11; String[] input = new String[LOOP_SIZE]; for (int i = 0; i < LOOP_SIZE; i++) { input[i] = i + "\t1"; } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); Iterator<Tuple> itr = server.openIterator("B"); int i = 0; while (itr.hasNext()) { Tuple t = itr.next(); assertEquals(2, t.size()); assertEquals(t.get(0), i); assertEquals(t.get(1), "1"); i++; } assertFalse(itr.hasNext()); assertEquals(LOOP_SIZE, i); } @Test public void testMultiPartColsInData() throws Exception { driver.run("drop table employee"); String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", "111240\tKavya\t01/01/2002\tF\tIN\tAP" }; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer pig = new PigServer(ExecType.LOCAL); pig.setBatchOn(); pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); pig.registerQuery("TN = FILTER A BY emp_state == 'TN';"); pig.registerQuery("KA = FILTER A BY emp_state == 'KA';"); pig.registerQuery("KL = FILTER A BY emp_state == 'KL';"); pig.registerQuery("AP = FILTER A BY emp_state == 'AP';"); pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');"); pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');"); pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');"); pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');"); pig.executeBatch(); driver.run("select * from employee"); ArrayList<String> results = new ArrayList<String>(); driver.getResults(results); assertEquals(4, results.size()); Collections.sort(results); assertEquals(inputData[0], results.get(0)); assertEquals(inputData[1], results.get(1)); assertEquals(inputData[2], results.get(2)); assertEquals(inputData[3], results.get(3)); // verify the directories in table location Path path = new Path(client.getTable("default", "employee").getSd().getLocation()); FileSystem fs = path.getFileSystem(hiveConf); assertEquals(1, fs.listStatus(path).length); assertEquals( 4, fs.listStatus(new Path(client.getTable("default", "employee").getSd().getLocation() + File.separator + "emp_country=IN")).length); driver.run("drop table employee"); } @Test public void testStoreInPartiitonedTbl() throws Exception { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int LOOP_SIZE = 11; String[] input = new String[LOOP_SIZE]; for (int i = 0; i < LOOP_SIZE; i++) { input[i] = i + ""; } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);"); server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); Iterator<Tuple> itr = server.openIterator("B"); int i = 0; while (itr.hasNext()) { Tuple t = itr.next(); assertEquals(2, t.size()); assertEquals(t.get(0), i); assertEquals(t.get(1), "1"); i++; } assertFalse(itr.hasNext()); assertEquals(11, i); // verify the scratch directories has been cleaned up Path path = new Path(client.getTable("default", "junit_unparted").getSd().getLocation()); FileSystem fs = path.getFileSystem(hiveConf); assertEquals(1, fs.listStatus(path).length); } @Test public void testNoAlias() throws IOException, CommandNeedRetryException { driver.run("drop table junit_parted"); String createTable = "create table junit_parted(a int, b string) partitioned by (ds string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } PigServer server = new PigServer(ExecType.LOCAL); boolean errCaught = false; try { server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("B = foreach A generate a+10, b;"); server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); server.executeBatch(); } catch (PigException fe) { PigException pe = LogUtils.getPigException(fe); assertTrue(pe instanceof FrontendException); assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); assertTrue(pe .getMessage() .contains( "Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.")); errCaught = true; } assertTrue(errCaught); errCaught = false; try { server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);"); server.registerQuery("B = foreach A generate a, B;"); server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); server.executeBatch(); } catch (PigException fe) { PigException pe = LogUtils.getPigException(fe); assertTrue(pe instanceof FrontendException); assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); assertTrue(pe.getMessage().contains( "Column names should all be in lowercase. Invalid name found: B")); errCaught = true; } driver.run("drop table junit_parted"); assertTrue(errCaught); } @Test public void testStoreMultiTables() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } driver.run("drop table junit_unparted2"); createTable = "create table junit_unparted2(a int, b string) stored as RCFILE"; retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; int k = 0; for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; for (int j = 1; j <= LOOP_SIZE; j++) { input[k++] = si + "\t" + j; } } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("B = filter A by a < 2;"); server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); server.registerQuery("C = filter A by a >= 2;"); server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();"); server.executeBatch(); driver.run("select * from junit_unparted"); ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); driver.run("select * from junit_unparted2"); ArrayList<String> res2 = new ArrayList<String>(); driver.getResults(res2); res.addAll(res2); driver.run("drop table junit_unparted"); driver.run("drop table junit_unparted2"); Iterator<String> itr = res.iterator(); for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { assertEquals(input[i], itr.next()); } assertFalse(itr.hasNext()); } @Test public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; int k = 0; for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; for (int j = 1; j <= LOOP_SIZE; j++) { input[k++] = si + "\t" + j; } } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');"); server.executeBatch(); driver.run("select * from junit_unparted"); ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); driver.run("drop table junit_unparted"); Iterator<String> itr = res.iterator(); for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { assertEquals(input[i], itr.next()); } assertFalse(itr.hasNext()); } @Test public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; int k = 0; for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; for (int j = 1; j <= LOOP_SIZE; j++) { input[k++] = si + "\t" + j; } } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); server.executeBatch(); driver.run("select * from junit_unparted"); ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); driver.run("drop table junit_unparted"); Iterator<String> itr = res.iterator(); for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { assertEquals(input[i], itr.next()); } assertFalse(itr.hasNext()); } @Test public void testEmptyStore() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; int k = 0; for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; for (int j = 1; j <= LOOP_SIZE; j++) { input[k++] = si + "\t" + j; } } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("B = filter A by a > 100;"); server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); server.executeBatch(); driver.run("select * from junit_unparted"); ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); driver.run("drop table junit_unparted"); Iterator<String> itr = res.iterator(); assertFalse(itr.hasNext()); } @Test public void testBagNStruct() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(b string,a struct<a1:int>, arr_of_struct array<string>, " + "arr_of_struct2 array<struct<s1:string,s2:string>>, arr_of_struct3 array<struct<s3:string>>) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } String[] inputData = new String[] { "zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}" }; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server .registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});"); server .registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); server.executeBatch(); driver.run("select * from junit_unparted"); ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); driver.run("drop table junit_unparted"); Iterator<String> itr = res.iterator(); assertEquals( "zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next()); assertEquals( "chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next()); assertFalse(itr.hasNext()); } @Test public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(a int, b float, c double, d bigint, e string, h boolean, f binary, g binary) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int i = 0; String[] input = new String[3]; input[i++] = "0\t\t\t\t\t\t\t"; // Empty values except first column input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "true" + "\tbinary-data"; // First column empty input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "false" + "\tbinary-data"; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray);"); // null gets stored into column g which is a binary field. server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray');"); server.executeBatch(); driver.run("select * from junit_unparted"); ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); Iterator<String> itr = res.iterator(); String next = itr.next(); assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", next); assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\ttrue\tbinary-data\tNULL", itr.next()); assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tfalse\tbinary-data\tNULL", itr.next()); assertFalse(itr.hasNext()); server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";"); Iterator<Tuple> iter = server.openIterator("B"); int count = 0; int num5nulls = 0; while (iter.hasNext()) { Tuple t = iter.next(); if (t.get(6) == null) { num5nulls++; } else { assertTrue(t.get(6) instanceof DataByteArray); } assertNull(t.get(7)); count++; } assertEquals(3, count); assertEquals(1, num5nulls); driver.run("drop table junit_unparted"); } @Test public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { driver.run("drop table junit_unparted"); String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int LOOP_SIZE = 3; String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; int k = 0; for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; for (int j = 1; j <= LOOP_SIZE; j++) { inputData[k++] = si + "\t" + j; } } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); server.executeBatch(); driver.run("select * from junit_unparted"); ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); driver.run("drop table junit_unparted"); Iterator<String> itr = res.iterator(); for (int i = 1; i <= LOOP_SIZE; i++) { String si = i + ""; for (int j = 1; j <= LOOP_SIZE; j++) { assertEquals(si + "\t" + j, itr.next()); } } assertFalse(itr.hasNext()); } @Test public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { driver.run("drop table if exists employee"); String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", "111240\tKavya\t01/01/2002\tF\tIN\tAP" }; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer pig = new PigServer(ExecType.LOCAL); pig.setBatchOn(); pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');"); pig.executeBatch(); driver.run("select * from employee"); ArrayList<String> results = new ArrayList<String>(); driver.getResults(results); assertEquals(4, results.size()); Collections.sort(results); assertEquals(inputData[0], results.get(0)); assertEquals(inputData[1], results.get(1)); assertEquals(inputData[2], results.get(2)); assertEquals(inputData[3], results.get(3)); driver.run("drop table employee"); } @Test public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { driver.run("drop table if exists employee"); String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", "111239\tSatya\t01/01/2001\tM\tIN\tKL", "111240\tKavya\t01/01/2002\tF\tIN\tAP" }; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer pig = new PigServer(ExecType.LOCAL); pig.setBatchOn(); pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); pig.executeBatch(); driver.run("select * from employee"); ArrayList<String> results = new ArrayList<String>(); driver.getResults(results); assertEquals(4, results.size()); Collections.sort(results); assertEquals(inputData[0], results.get(0)); assertEquals(inputData[1], results.get(1)); assertEquals(inputData[2], results.get(2)); assertEquals(inputData[3], results.get(3)); driver.run("drop table employee"); } @Test public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { driver.run("drop table if exists employee"); String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } String[] inputData = {}; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); PigServer pig = new PigServer(ExecType.LOCAL); pig.setBatchOn(); pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); pig.executeBatch(); driver.run("select * from employee"); ArrayList<String> results = new ArrayList<String>(); driver.getResults(results); assertEquals(0, results.size()); driver.run("drop table employee"); } @Test public void testPartitionPublish() throws IOException, CommandNeedRetryException { driver.run("drop table ptn_fail"); String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) stored as " + storageFormat; int retCode = driver.run(createTable).getResponseCode(); if (retCode != 0) { throw new IOException("Failed to create table."); } int LOOP_SIZE = 11; String[] input = new String[LOOP_SIZE]; for (int i = 0; i < LOOP_SIZE; i++) { input[i] = i + "\tmath"; } HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, c:chararray);"); server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() + "($0);"); server.registerQuery("store B into 'ptn_fail' using " + HCatStorer.class.getName() + "('b=math');"); server.executeBatch(); String query = "show partitions ptn_fail"; retCode = driver.run(query).getResponseCode(); if (retCode != 0) { throw new IOException("Error " + retCode + " running query " + query); } ArrayList<String> res = new ArrayList<String>(); driver.getResults(res); assertEquals(0, res.size()); // Make sure the partitions directory is not in hdfs. assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists()); assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")).exists()); } static public class FailEvalFunc extends EvalFunc<Boolean> { /* * @param Tuple /* @return null /* @throws IOException * * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple) */ @Override public Boolean exec(Tuple tuple) throws IOException { throw new IOException("Eval Func to mimic Failure."); } } }