/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.piggybank.test.storage; import java.io.File; import java.io.IOException; import java.util.Iterator; import junit.framework.TestCase; import org.apache.commons.lang.StringUtils; import org.apache.pig.ExecType; import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.pigunit.pig.PigServer; import org.apache.pig.test.Util; import org.apache.pig.tools.parameters.ParseException; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; public class TestFixedWidthLoader { private static final String dataDir = "build/test/tmpdata/"; private static final String testFile = "fixed_width_data"; private PigServer pig; @Before public void setup() throws IOException { pig = new PigServer(ExecType.LOCAL); Util.deleteDirectory(new File(dataDir)); try { pig.mkdirs(dataDir); Util.createLocalInputFile(dataDir + testFile, new String[] { " int long float double bit boolean datetime string string extra", "12345 1234567890000 2.718 3.141593 0 true 2007-04-05T14:30:10Z avertwolowolo", "12345 1234567890000 2.718 3.141593 1 false 2007-04-05T14:30:10Z avertwolowolo moose", " 1234567890000 3.141593 true avert ", " 1234567890000 3.141593 false", " 1234567890000 cerulean true" }); } catch (IOException e) {}; } @After public void cleanup() throws IOException { Util.deleteDirectory(new File(dataDir)); pig.shutdown(); } @Test public void defaultSchema() throws IOException, ParseException { pig.registerQuery( "data = load '" + dataDir + testFile + "' " + "using org.apache.pig.piggybank.storage.FixedWidthLoader('-5, 9-21, 25-29 , 33 - 40, 44-44, 48-52, 55-74, 78-82, 83-90', 'USE_HEADER');" ); Iterator<Tuple> data = pig.openIterator("data"); String[] expected = { "(int,long,float,double,t,olean,datetime,tring,string)", // "bit", "boolean", and first "string" fields cut off properly "(12345,1234567890000,2.718,3.141593,0,true,2007-04-05T14:30:10Z,avert,wolowolo)", "(12345,1234567890000,2.718,3.141593,1,false,2007-04-05T14:30:10Z,avert,wolowolo)", "(,1234567890000,,3.141593,,true,,avert,)", "(,1234567890000,,3.141593,,false,,,)", "(,1234567890000,,cerulean,,true,,,)" // no problem with this since loaded as a bytearray }; Assert.assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(data, "\n")); } @Test public void userSchema() throws IOException, ParseException { pig.registerQuery( "data = load '" + dataDir + testFile + "' " + "using org.apache.pig.piggybank.storage.FixedWidthLoader(" + "'-5, 9-21, 25-29, 33-40, 44, 48-52, 55-74, 78-82, 83-90', " + "'SKIP_HEADER', " + "'i: int, l: long, f: float, d: double, bit: int, b: boolean, dt: datetime, c_arr: chararray, b_arr: bytearray'" + ");" ); Iterator<Tuple> data = pig.openIterator("data"); String[] expected = { // Header skipped "(12345,1234567890000,2.718,3.141593,0,true,2007-04-05T14:30:10.000Z,avert,wolowolo)", // scalar types "(12345,1234567890000,2.718,3.141593,1,false,2007-04-05T14:30:10.000Z,avert,wolowolo)", // ignore extra field "moose" after beryl "(,1234567890000,,3.141593,,true,,avert,)", // nulls fields (all spaces) "(,1234567890000,,3.141593,,false,,,)", // missing fields (line break earlier than expected) "(,1234567890000,,,,true,,,)" // invalid double field "cerulean" turns to null }; Assert.assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(data, "\n")); } @Test public void userSchemaFewerFieldsThanColumns() throws IOException, ParseException { pig.registerQuery( "data = load '" + dataDir + testFile + "' " + "using org.apache.pig.piggybank.storage.FixedWidthLoader(" + "'-5, 9-21, 25-29, 33-40, 44, 48-52, 55-74, 78-82, 83-90', " + "'SKIP_HEADER', " + "'i: int, l: long, f: float, d: double'" + ");" ); Iterator<Tuple> data = pig.openIterator("data"); String[] expected = { "(12345,1234567890000,2.718,3.141593)", "(12345,1234567890000,2.718,3.141593)", "(,1234567890000,,3.141593)", "(,1234567890000,,3.141593)", "(,1234567890000,,)" }; Assert.assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(data, "\n")); } @Test(expected=FrontendException.class) public void doesNotSupportObjectTypes() throws IOException, ParseException { pig.registerQuery( "data = load '" + dataDir + testFile + "' " + "using org.apache.pig.piggybank.storage.FixedWidthLoader(" + "'-5, 9-21, 25-29, 33-40, 44, 48-52, 55-74, 78-82, 83-90', 'SKIP_HEADER', 'i: (j: int, k: int)'" + ");" ); Iterator<Tuple> data = pig.openIterator("data"); } @Test(expected=FrontendException.class) public void fewerColumnsThanSchemaFields() throws IOException, ParseException { pig.registerQuery( "data = load '" + dataDir + testFile + "' " + "using org.apache.pig.piggybank.storage.FixedWidthLoader(" + "'1-5, 9-21, 25-29, 33-40', 'SKIP_HEADER', 'i: int, l: long, f: float, d: double, bit: int, c: chararray, b: bytearray') " + ");" ); Iterator<Tuple> data = pig.openIterator("data"); } @Test(expected=FrontendException.class) public void columnStartsAtZero() throws IOException, ParseException { pig.registerQuery( "data = load '" + dataDir + testFile + "' " + "using org.apache.pig.piggybank.storage.FixedWidthLoader(" + "'0-5', 'SKIP_HEADER', 'i: int'" + ");" ); Iterator<Tuple> data = pig.openIterator("data"); } @Test(expected=FrontendException.class) public void columnEndLessThanStart() throws IOException, ParseException { pig.registerQuery( "data = load '" + dataDir + testFile + "' " + "using org.apache.pig.piggybank.storage.FixedWidthLoader(" + "'5-0', 'SKIP_HEADER', 'i: int'" + ");" ); Iterator<Tuple> data = pig.openIterator("data"); } @Test public void pushProjection() throws IOException, ParseException { pig.registerQuery( "data = load '" + dataDir + testFile + "' " + "using org.apache.pig.piggybank.storage.FixedWidthLoader(" + "'-5, 9-21, 25-29 , 33 - 40, 44-44, 48-52, 55-74, 78-82, 83-90', " + "'SKIP_HEADER', " + "'i: int, l: long, f: float, d: double, bit: int, b: boolean, dt: datetime, c_arr: chararray, b_arr: bytearray'" + ");" ); pig.registerQuery( "projection = foreach data generate $1, $3, $7;" ); Iterator<Tuple> projection = pig.openIterator("projection"); String[] expected = { "(1234567890000,3.141593,avert)", "(1234567890000,3.141593,avert)", "(1234567890000,3.141593,avert)", "(1234567890000,3.141593,)", "(1234567890000,,)" }; Assert.assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(projection, "\n")); } }