/*
* Copyright (c) 2014 Villu Ruusmann
*
* This file is part of JPMML-Storm
*
* JPMML-Storm is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* JPMML-Storm is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with JPMML-Storm. If not, see <http://www.gnu.org/licenses/>.
*/
package org.jpmml.storm;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import org.dmg.pmml.FieldName;
public class CsvReaderSpout extends BaseRichSpout {
private File file = null;
private List<FieldName> columns = null;
private BufferedReader reader = null;
private List<String> header = null;
private SpoutOutputCollector collector = null;
public CsvReaderSpout(File file, List<FieldName> columns){
this.file = file;
this.columns = columns;
}
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector){
try {
this.reader = new BufferedReader(new FileReader(this.file));
this.header = readLine();
} catch(IOException ioe){
throw new RuntimeException(ioe);
}
this.collector = collector;
}
@Override
public void nextTuple(){
List<String> line;
try {
line = readLine();
} catch(IOException ioe){
throw new RuntimeException(ioe);
}
if(line == null){
return;
}
Values values = new Values();
List<FieldName> columns = this.columns;
for(FieldName column : columns){
int index = this.header.indexOf(column.getValue());
if(index < 0){
values.add(null);
} else
{
values.add(line.get(index));
}
}
this.collector.emit(values);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer){
List<String> fields = new ArrayList<String>();
List<FieldName> columns = this.columns;
for(FieldName column : columns){
fields.add(column.getValue());
}
declarer.declare(new Fields(fields));
}
@Override
public void close(){
try {
this.reader.close();
} catch(IOException ioe){
throw new RuntimeException(ioe);
}
}
private List<String> readLine() throws IOException {
String line = this.reader.readLine();
if(line == null){
return null;
}
List<String> result = new ArrayList<String>();
String[] cells = line.split(",");
for(String cell : cells){
if(("N/A").equals(cell) || ("N/A").equals(cell)){
cell = null;
}
result.add(cell);
}
return result;
}
}