package org.archive.hadoop;
import java.io.IOException;
import java.util.ArrayList;
import java.util.logging.Logger;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.pig.FileInputLoadFunc;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.archive.resource.MetaData;
/**
* Pig Storage Loader for Archive meta data.
*
* @author brad
*
*/
public class ArchiveMetadataLoader extends FileInputLoadFunc {
private final static Logger LOG =
Logger.getLogger(ArchiveMetadataLoader.class.getName());
ResourceRecordReader reader;
protected TupleFactory mTupleFactory = TupleFactory.getInstance();
private ArrayList<Object> mProtoTuple = null;
private ResourceContext key;
private MetaData value;
public ArchiveMetadataLoader() {
mProtoTuple = new ArrayList<Object>(3);
}
@SuppressWarnings("rawtypes")
@Override
public InputFormat getInputFormat() throws IOException {
return new ResourceInputFormat();
}
@Override
public Tuple getNext() throws IOException {
boolean next = false;
try {
next = reader.nextKeyValue();
} catch (InterruptedException e) {
// is this needed and the right way?
throw new IOException(e);
}
if (!next)
return null;
try {
key = reader.getCurrentKey();
LOG.info(String.format("Loaded key-offset %d\n", key.offset));
value = reader.getCurrentValue();
} catch (InterruptedException e) {
// is this needed and the right way?
throw new IOException(e);
}
mProtoTuple.add(key.name);
mProtoTuple.add(key.offset);
mProtoTuple.add(value.getTopMetaData().toString());
Tuple t = mTupleFactory.newTuple(mProtoTuple);
mProtoTuple.clear();
return t;
}
@SuppressWarnings("rawtypes")
@Override
public void prepareToRead(RecordReader reader, PigSplit arg1)
throws IOException {
this.reader = (ResourceRecordReader) reader;
// FileSplit fSplit = (FileSplit) arg1.getWrappedSplit();
// System.err.format("Prepare to read(%s) (%d-%d)\n",
// fSplit.getPath().toUri().toASCIIString(),
// fSplit.getStart(),fSplit.getLength());
}
@Override
public void setLocation(String location, Job job) throws IOException {
FileInputFormat.setInputPaths(job, location);
}
}