package scs.demos.mapreduce.user;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Properties;
import org.omg.CORBA.AnyHolder;
import org.omg.CORBA.Any;
import org.omg.CORBA.ORB;
import scs.demos.mapreduce.FileSplit;
import scs.demos.mapreduce.IOMapReduceException;
import scs.demos.mapreduce.RecordReaderPOA;
import scs.demos.mapreduce.Reporter;
import scs.demos.mapreduce.schedule.LogError;
/**
* Classe que implementa a interface RecordReader, retornando os token lidos de um fileSplit.
* Deve ser inicializado com o nome do arquivo de configuracão e o FileSplit associado
* @author Sand Luz Correa
*/
public class TokenRecordReader extends RecordReaderPOA {
private FileSplit fileSplit = null;
private int bufferSize;
private final int BUFFER_SIZE = 100000; //100K;
private final char SEPARATOR = ' ';
private final String KEY = "none";
private Properties config = null;
private char[] buff = null;
private char[] buffWord = new char[1000];
private String word = null;
private int pos = 0;
private int index = 0;
private int size;
private InputStream in = null;
private BufferedReader br = null;
private boolean lastReturned = false;
private Reporter reporter = null;
private ORB orb;
private boolean opened = false;
private String exception;
private Any keyAux = null;
private Any valueAux = null;
/* retorna -1 se eof
* retorna 0 caso contrario
*/
private int read() throws java.io.IOException {
int ret;
try {
index = 0;
word = null;
while (!lookup()) {
if ((ret = br.read(buff,0,bufferSize)) ==-1) {
if (index > 0) {
word = String.copyValueOf(buffWord,0,index);
}
return ret;
}
else {
size = ret;
pos = 0;
}
}
word = String.copyValueOf(buffWord,0,index);
return 0;
} catch (java.io.IOException e) {
throw e;
}
}
private boolean lookup() {
while (pos < size) {
if (buff[pos] != SEPARATOR) {
if (index < buffWord.length) {
buffWord[index] = buff[pos];
index++;
}
else {
return true;
}
}
else {
while(pos < size && buff[pos]==SEPARATOR) {
pos++;
}
if (pos < size || index > 0) {
return true;
}
}
pos++;
}
return false;
}
public void open(String confFileName, FileSplit fileSplit, Reporter reporter) throws scs.demos.mapreduce.IOMapReduceException {
try {
if (opened) {
return;
}
this.reporter = reporter;
this.fileSplit = fileSplit;
this.config = new Properties();
orb = ORB.init();
config.load(new FileInputStream(confFileName));
bufferSize = Integer.valueOf(this.config.getProperty("mapred.RecordReader.buffer-size"));
if (bufferSize == 0) {
bufferSize = BUFFER_SIZE;
}
keyAux = orb.create_any();
valueAux = orb.create_any();
opened = true;
} catch (Exception e) {
exception = LogError.getStackTrace(e);
reporter.report(0,"TokenRecordReader::open -" + exception);
throw new scs.demos.mapreduce.IOMapReduceException();
}
}
public boolean next(AnyHolder key, AnyHolder value) throws scs.demos.mapreduce.IOMapReduceException {
try {
if (!opened) {
return false;
}
if (buff == null) {
in = new FileInputStream(fileSplit.getPath());
br = new BufferedReader(new InputStreamReader(in));
buff = new char[bufferSize];
pos = 0;
size = br.read(buff,0,bufferSize);
}
int r = read();
if ((r == 0) || (word != null && !lastReturned)) {
//reporter.report(1,"word: " + word);
String split[] = word.split("\\|");
if (split.length==1) {
keyAux.insert_string(KEY);
valueAux.insert_string(word);
}
else {
keyAux.insert_string(split[0]);
valueAux.insert_string(split[1]);
}
if ((r < 0) && (word != null) && (!lastReturned)) {
lastReturned = true;
}
key.value = keyAux ;
value.value = valueAux;
//reporter.report(1, "retornando key:" + key.value.extract_string());
//reporter.report(1, "retornando value:" + value.value.extract_string());
return true;
}
else {
keyAux.insert_string(KEY);
valueAux.insert_string(KEY);
key.value = keyAux ;
value.value = valueAux;
return false;
}
} catch (Exception e) {
exception = LogError.getStackTrace(e);
reporter.report(0,"TokenRecordReader::open -" + exception);
throw new scs.demos.mapreduce.IOMapReduceException();
}
}
public void close() throws scs.demos.mapreduce.IOMapReduceException{
try{
if (!opened) {
return;
}
in.close();
br.close();
opened = false;
} catch (Exception e){
exception = LogError.getStackTrace(e);
reporter.report(0,"TokenRecordReader::open -" + exception);
throw new scs.demos.mapreduce.IOMapReduceException();
}
}
public FileSplit getFileSplit() {
return fileSplit;
}
}