package org.archive.hadoop.pig;
import java.io.IOException;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
import org.apache.pig.builtin.TextLoader;
import org.apache.pig.data.TupleFactory;
public class ZipNumLoader extends TextLoader {
protected final static String ZIPNUM_SUMMARY_URI = "zipnum.summaryUri";
protected final static String ZIPNUM_NUM_SPLITS = "zipnum.numSplits";
protected final static String ZIPNUM_NUM_LINES_PER_SPLIT = "zipnum.numLinesPerSplit";
protected final static String ZIPNUM_NUM_TOTAL_LINES = "zipnum.numTotalLines";
protected final static String ZIPNUM_URL_START = "zipnum.url.start";
protected final static String ZIPNUM_URL_END = "zipnum.url.end";
protected TupleFactory factory;
protected String clusterUriOrLoc;
protected int numLinesPerSplit = 0;
protected int numSplits = 0;
protected ZipNumRecordReader lastReader;
public ZipNumLoader()
{
}
public ZipNumLoader(String numLinesPerSplit)
{
this.numLinesPerSplit = Integer.parseInt(numLinesPerSplit);
}
// public ZipNumLoader(String param, String clusterUriOrLoc)
// {
// this();
// this.numSplits = 0;
// this.numLinesPerSplit = Integer.parseInt(param);
// this.clusterUriOrLoc = clusterUriOrLoc;
// }
//
// @Override
// public String relativeToAbsolutePath(String location, Path curDir)
// throws IOException {
//
// if (GeneralURIStreamFactory.isHttp(location)) {
// return URLDecoder.decode(location, "UTF-8");
// }
//
// return super.relativeToAbsolutePath(location, curDir);
// }
//
// @Override
// public void setLocation(String location, Job job) throws IOException {
// Configuration conf = job.getConfiguration();
//
// conf.set(ZIPNUM_SUMMARY_URI, location);
//
// if (numLinesPerSplit > 0) {
// conf.setInt(ZIPNUM_NUM_LINES_PER_SPLIT, numLinesPerSplit);
// }
//
// if (numSplits > 0) {
// conf.setInt(ZIPNUM_NUM_SPLITS, numSplits);
// }
//
// super.setLocation(location, job);
// }
@Override
public void setLocation(String location, Job job) throws IOException {
super.setLocation(location, job);
if (numLinesPerSplit > 0) {
NLineInputFormat.setNumLinesPerSplit(job, numLinesPerSplit);
}
}
@Override
public InputFormat getInputFormat() {
return new NLineInputFormat()
{
@Override
public RecordReader createRecordReader(
InputSplit genericSplit, TaskAttemptContext context)
throws IOException {
//Path path = ((FileSplit)genericSplit).getPath();
return new ZipNumRecordReader();
//in = lastReader = new ZipNumRecordReader();
//return lastReader;
}
};
}
}