package hip.ch3.seqfile.writable; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.*; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DefaultCodec; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.pig.StoreFunc; import org.apache.pig.backend.BackendException; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import static org.apache.pig.data.DataType.*; /** * A basic SequenceFile Store Func. Handles storing built-in * Writables. */ public class SequenceFileStoreFunc extends StoreFunc { private static final Logger LOG = LoggerFactory.getLogger(SequenceFileStoreFunc.class); @SuppressWarnings("rawtypes") protected RecordWriter writer; private final Class keyClass; private final Class valueClass; private final String compressionType; private final String compressionCodecClass; public SequenceFileStoreFunc(String keyClass, String valueClass) throws ClassNotFoundException { this(keyClass, valueClass, null, null); } public SequenceFileStoreFunc(String keyClass, String valueClass, String compressionType, String compressionCodecClass) throws ClassNotFoundException { this.compressionType = compressionType; this.compressionCodecClass = compressionCodecClass; this.keyClass = Class.forName(keyClass); this.valueClass = Class.forName(valueClass); } @Override public OutputFormat getOutputFormat() throws IOException { return new SequenceFileOutputFormat(); } @Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(keyClass); job.setOutputValueClass(valueClass); if (compressionType != null && compressionCodecClass != null) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat. setOutputCompressorClass(job, codecClass); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.valueOf(compressionType)); } FileOutputFormat.setOutputPath(job, new Path(location)); } @Override public void prepareToWrite(RecordWriter writer) throws IOException { this.writer = writer; } @Override public void putNext(Tuple tuple) throws IOException { if (tuple != null && tuple.size() == 2) { try { writer.write(inferWritable(tuple.get(0)), inferWritable( tuple.get(1))); } catch (InterruptedException e) { // Under what circumstances does this happen? throw new IOException(e); } } } protected Object inferWritable(Object o) throws BackendException { System.out.println("Got object '" + o + "' type " + o.getClass()); switch (DataType.findType(o)) { case BYTEARRAY: { return new BytesWritable(((DataByteArray) o).get()); } case CHARARRAY: { return new Text(o.toString()); } case INTEGER: { return new IntWritable((Integer) o); } case LONG: { return new LongWritable((Long) o); } case FLOAT: { return new FloatWritable((Float) o); } case DOUBLE: { return new DoubleWritable((Double) o); } case BOOLEAN: { return new BooleanWritable((Boolean) o); } case BYTE: { return new ByteWritable((Byte) o); } } throw new BackendException("Unable to translate " + o.getClass() + " to a Writable datatype"); } }