package org.apache.pig.impl.io;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.pig.SLoadFunc;
import org.apache.pig.backend.stratosphere.executionengine.contractsLayer.PigDataSource;
import org.apache.pig.builtin.SPigStorage;
import org.apache.pig.data.Tuple;
import eu.stratosphere.nephele.configuration.Configuration;
import eu.stratosphere.nephele.fs.FileInputSplit;
import eu.stratosphere.nephele.io.RecordReader;
import eu.stratosphere.nephele.io.library.FileLineWriter;
import eu.stratosphere.nephele.template.InputSplit;
import eu.stratosphere.nephele.types.Record;
import eu.stratosphere.pact.common.contract.FileDataSource;
import eu.stratosphere.pact.common.generic.io.InputFormat;
import eu.stratosphere.pact.common.io.FileInputFormat;
import eu.stratosphere.pact.common.io.TextInputFormat;
/**
* Wrapper Loader which wraps a real SLoadFunc underneath and allows
* to read a file completely starting a given split
*
* Based on org.apache.pig.impl.io.ReadToEndLoader
*
* The call sequence to use this is:
* 1) construct an object using the constructor
* 2) Call getNext() in a loop till it returns null
*/
public class SReadToEndLoader extends SLoadFunc {
/**
* the wrapped LoadFunc which will do the actual reading
*/
private SLoadFunc wrappedLoadFunc;
private Configuration conf;
/**
* the input location string (typically input file/dir name )
*/
private String inputLocation;
/**
* If the splits to be read are not in increasing sequence of integers
* this array can be used
*/
private int[] toReadSplits = null;
/**
* index into toReadSplits
*/
private int toReadSplitsIdx = 0;
/**
* the index of the split the loader is currently reading from
*/
private int curSplitIndex;
/**
* the input splits returned by underlying {@link InputFormat#getSplits(JobContext)}
*/
private List<FileInputSplit> inpSplits = null;
/**
* underlying RecordReader
*/
private RecordReader reader = null;
/**
* underlying InputFormat
*/
private SPigTextInputFormat inputFormat = null;
public SReadToEndLoader(String inputLocation, int splitIndex) throws IOException {
this.inputLocation = inputLocation;
this.curSplitIndex = splitIndex;
init();
}
/**
* @param wrappedLoadFunc
* @param conf
* @param inputLocation
* @param splitIndex
* @throws IOException
* @throws InterruptedException
*/
public SReadToEndLoader(SLoadFunc wrappedLoadFunc, Configuration conf,
String inputLocation, int splitIndex) throws IOException {
this.wrappedLoadFunc = wrappedLoadFunc;
this.inputLocation = inputLocation;
this.conf = conf;
this.curSplitIndex = splitIndex;
init();
}
/**
* @param wrappedLoadFunc
* @param inputLocation
* @param splitIndex
* @throws IOException
* @throws InterruptedException
*/
public SReadToEndLoader(SLoadFunc wrappedLoadFunc, String inputLocation, int splitIndex) throws IOException {
this.wrappedLoadFunc = wrappedLoadFunc;
this.inputLocation = inputLocation;
this.curSplitIndex = splitIndex;
init();
}
/**
* This constructor takes an array of split indexes (toReadSplitIdxs) of the
* splits to be read.
* @param wrappedLoadFunc
* @param conf
* @param inputLocation
* @param toReadSplitIdxs
* @throws IOException
* @throws InterruptedException
*/
public SReadToEndLoader(SLoadFunc wrappedLoadFunc, Configuration conf,
String inputLocation, int[] toReadSplitIdxs) throws IOException {
this.wrappedLoadFunc = wrappedLoadFunc;
this.inputLocation = inputLocation;
this.toReadSplits = toReadSplitIdxs;
this.conf = conf;
this.curSplitIndex =
toReadSplitIdxs.length > 0 ? toReadSplitIdxs[0] : Integer.MAX_VALUE;
init();
}
/**
* Data is read by the Stratosphere DataSource PigDataSource
* @throws IOException
*/
@SuppressWarnings("unchecked")
private void init() throws IOException{
}
@Override
public FileInputFormat getInputFormat() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Tuple getNext() throws IOException {
Tuple t = wrappedLoadFunc.getNext();
if(t != null)
return t;
else
throw new IOException("[AVK] SReadToEndLoader: read empty tuple");
}
/*
private Tuple getNextHelper() throws IOException, InterruptedException {
Tuple t = null;
while(initializeReader()) {
t = wrappedLoadFunc.getNext();
if(t == null) {
// try next split
updateCurSplitIndex();
} else {
return t;
}
}
return null;
}*/
/**
* Updates curSplitIndex , just increment if splitIndexes is null,
* else get next split in splitIndexes
*/
/*private void updateCurSplitIndex() {
if(toReadSplits == null){
++curSplitIndex;
}else{
++toReadSplitsIdx;
if(toReadSplitsIdx >= toReadSplits.length){
// finished all the splits in splitIndexes array
curSplitIndex = Integer.MAX_VALUE;
}else{
curSplitIndex = toReadSplits[toReadSplitsIdx];
}
}
}*/
/*private boolean initializeReader() throws IOException,
InterruptedException {
if(curSplitIndex > inpSplits.size() - 1) {
// past the last split, we are done
return false;
}
InputSplit curSplit = inpSplits.get(curSplitIndex);
FileLineWriter fw = new FileLineWriter();
reader = new RecordReader(fw, Record.class);
wrappedLoadFunc.prepareToRead(reader, curSplit);
return true;
}*/
@Override
public void prepareToRead(RecordReader reader, InputSplit split)
throws IOException {
throw new UnsupportedOperationException();
}
@Override
public void setLocation(String location, PigDataSource fds) throws IOException {
// do nothing
}
@Override
public PigDataSource getDataSource(){
return this.wrappedLoadFunc.getDataSource();
}
}