package org.archive.hadoop.pig;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
import org.apache.pig.CollectableLoadFunc;
import org.apache.pig.FileSplitComparable;
import org.apache.pig.IndexableLoadFunc;
import org.apache.pig.OrderedLoadFunc;
import org.apache.pig.builtin.TextLoader;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
public class ZipNumLoader extends TextLoader implements IndexableLoadFunc, CollectableLoadFunc, OrderedLoadFunc {
protected final static String ZIPNUM_SUMMARY_URI = "zipnum.summaryUri";
protected final static String ZIPNUM_NUM_SPLITS = "zipnum.numSplits";
protected final static String ZIPNUM_NUM_LINES_PER_SPLIT = "zipnum.numLinesPerSplit";
protected final static String ZIPNUM_NUM_TOTAL_LINES = "zipnum.numTotalLines";
protected final static String ZIPNUM_URL_START = "zipnum.url.start";
protected final static String ZIPNUM_URL_END = "zipnum.url.end";
protected TupleFactory factory;
protected String clusterUriOrLoc;
protected int numLinesPerSplit = 0;
protected int numSplits = 0;
protected ZipNumRecordReader mergingReader;
public ZipNumLoader()
{
}
public ZipNumLoader(String numLinesPerSplit)
{
this.numLinesPerSplit = Integer.parseInt(numLinesPerSplit);
}
// public ZipNumLoader(String param, String clusterUriOrLoc)
// {
// this();
// this.numSplits = 0;
// this.numLinesPerSplit = Integer.parseInt(param);
// this.clusterUriOrLoc = clusterUriOrLoc;
// }
//
// @Override
// public String relativeToAbsolutePath(String location, Path curDir)
// throws IOException {
//
// if (GeneralURIStreamFactory.isHttp(location)) {
// return URLDecoder.decode(location, "UTF-8");
// }
//
// return super.relativeToAbsolutePath(location, curDir);
// }
//
// @Override
// public void setLocation(String location, Job job) throws IOException {
// Configuration conf = job.getConfiguration();
//
// conf.set(ZIPNUM_SUMMARY_URI, location);
//
// if (numLinesPerSplit > 0) {
// conf.setInt(ZIPNUM_NUM_LINES_PER_SPLIT, numLinesPerSplit);
// }
//
// if (numSplits > 0) {
// conf.setInt(ZIPNUM_NUM_SPLITS, numSplits);
// }
//
// super.setLocation(location, job);
// }
@Override
public void setLocation(String location, Job job) throws IOException {
super.setLocation(location, job);
if (numLinesPerSplit > 0) {
NLineInputFormat.setNumLinesPerSplit(job, numLinesPerSplit);
}
}
@Override
public InputFormat getInputFormat() {
return new ZipNumInputFormat();
}
@Override
public Tuple getNext() throws IOException {
if (mergingReader != null) {
super.prepareToRead(mergingReader, null);
}
return super.getNext();
}
@Override
public void initialize(Configuration conf) throws IOException {
mergingReader = new ZipNumRecordReader();
}
@Override
public void seekNear(Tuple tuple) throws IOException {
if (tuple.isNull() || tuple.size() < 1) {
return;
}
String theKey = (String)tuple.get(0);
if (mergingReader != null) {
mergingReader.seekNear(theKey);
}
}
@Override
public void close() throws IOException {
if (mergingReader != null) {
mergingReader.close();
mergingReader = null;
}
}
@Override
public WritableComparable<?> getSplitComparable(InputSplit split)
throws IOException {
FileSplit fileSplit = (FileSplit)split;
return new FileSplitComparable(fileSplit.getPath().toString(), fileSplit.getStart());
}
@Override
public void ensureAllKeyInstancesInSameSplit() throws IOException {
// TODO Auto-generated method stub
}
}