package com.twitter.elephantbird.pig.util; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Iterator; import java.util.List; import com.twitter.elephantbird.util.HadoopCompat; import org.apache.commons.cli.ParseException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; import org.apache.pig.data.Tuple; import org.apache.pig.impl.plan.OperatorKey; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import com.twitter.elephantbird.mapreduce.input.RawSequenceFileRecordReader; import com.twitter.elephantbird.pig.load.SequenceFileLoader; import com.twitter.elephantbird.pig.store.SequenceFileStorage; /** * Base class which facilitates creation of unit tests for {@link WritableConverter} * implementations. * * @author Andy Schlaikjer */ public abstract class AbstractTestWritableConverter<W extends Writable, C extends WritableConverter<W>> { private final Class<? extends W> writableClass; private final Class<? extends C> writableConverterClass; private final String writableConverterArguments; private final W[] data; private final String[] expected; private final String valueSchema; protected PigServer pigServer; protected String tempFilename; public AbstractTestWritableConverter(final Class<? extends W> writableClass, final Class<? extends C> writableConverterClass, final String writableConverterArguments, final W[] data, final String[] expected, final String valueSchema) { this.writableClass = writableClass; this.writableConverterClass = writableConverterClass; this.writableConverterArguments = writableConverterArguments == null ? "" : writableConverterArguments; this.data = data; this.expected = expected; this.valueSchema = valueSchema; } protected void registerReadQuery(String filename, String writableConverterArguments, String valueSchema) throws IOException { pigServer.registerQuery(String.format("A = LOAD 'file:%s' USING %s('-c %s', '-c %s %s')%s;", filename, SequenceFileLoader.class.getName(), IntWritableConverter.class.getName(), writableConverterClass.getName(), writableConverterArguments, valueSchema == null || valueSchema.isEmpty() ? "" : String.format(" AS (key: int, value: %s)", valueSchema))); } protected void registerReadQuery(String writableConverterArguments, String valueSchema) throws IOException { registerReadQuery(tempFilename, writableConverterArguments, valueSchema); } protected void registerReadQuery(String filename) throws IOException { registerReadQuery(filename, writableConverterArguments, valueSchema); } protected void registerReadQuery() throws IOException { registerReadQuery(tempFilename, writableConverterArguments, valueSchema); } protected void registerWriteQuery(String filename, String writableConverterArguments) throws IOException { pigServer.registerQuery(String.format( "STORE A INTO 'file:%s' USING %s('-c %s', '-c %s -t %s -- %s');", filename, SequenceFileStorage.class.getName(), IntWritableConverter.class.getName(), writableConverterClass.getName(), writableClass.getName(), writableConverterArguments)); } protected void registerWriteQuery(String filename) throws IOException { registerWriteQuery(filename, writableConverterArguments); } @Before public void setup() throws IOException { // create local Pig server pigServer = PigTestUtil.makePigServer(); // create temp SequenceFile final File tempFile = File.createTempFile("test", ".txt"); tempFilename = tempFile.getAbsolutePath(); final Path path = new Path("file:///" + tempFilename); final Configuration conf = new Configuration(); final FileSystem fs = path.getFileSystem(conf); final IntWritable key = new IntWritable(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), writableClass); for (int i = 0; i < data.length; ++i) { key.set(i); writer.append(key, data[i]); } } finally { IOUtils.closeStream(writer); } } @Test public void readOutsidePig() throws ClassCastException, ParseException, ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, InterruptedException { // simulate Pig front-end runtime final SequenceFileLoader<IntWritable, Text> loader = new SequenceFileLoader<IntWritable, Text>(String.format("-c %s", IntWritableConverter.class.getName()), String.format( "-c %s %s", writableConverterClass.getName(), writableConverterArguments)); Job job = new Job(); loader.setUDFContextSignature("12345"); loader.setLocation(tempFilename, job); // simulate Pig back-end runtime final RecordReader<DataInputBuffer, DataInputBuffer> reader = new RawSequenceFileRecordReader(); final FileSplit fileSplit = new FileSplit(new Path(tempFilename), 0, new File(tempFilename).length(), new String[] { "localhost" }); final TaskAttemptContext context = HadoopCompat.newTaskAttemptContext(HadoopCompat.getConfiguration(job), new TaskAttemptID()); reader.initialize(fileSplit, context); final InputSplit[] wrappedSplits = new InputSplit[] { fileSplit }; final int inputIndex = 0; final List<OperatorKey> targetOps = Arrays.asList(new OperatorKey("54321", 0)); final int splitIndex = 0; final PigSplit split = new PigSplit(wrappedSplits, inputIndex, targetOps, splitIndex); split.setConf(HadoopCompat.getConfiguration(job)); loader.prepareToRead(reader, split); // read tuples and validate validate(new LoadFuncTupleIterator(loader)); } @Test public void read() throws IOException { registerReadQuery(); validate(pigServer.openIterator("A")); } @Test public void readWriteRead() throws IOException { registerReadQuery(); registerWriteQuery(tempFilename + "-2"); registerReadQuery(tempFilename + "-2"); validate(pigServer.openIterator("A")); } protected void validate(String[] expected, Iterator<Tuple> it) throws ExecException { int tupleCount = 0; for (; it.hasNext(); ++tupleCount) { final Tuple tuple = it.next(); Assert.assertNotNull(tuple); Assert.assertEquals(2, tuple.size()); Object value = tuple.get(1); Assert.assertNotNull(value); Assert.assertEquals(expected[tupleCount], value.toString()); } Assert.assertEquals(data.length, tupleCount); } protected void validate(Iterator<Tuple> it) throws ExecException { validate(expected, it); } }