/*
* beymani: Outlier and anamoly detection
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.beymani.predictor;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.Scanner;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
/**
* @author pranab
*
*/
public class FileSpout extends BaseRichSpout {
private SpoutOutputCollector collector;
private Map conf;
private File[] files;
private Scanner scanner;
/**
*
*/
private int curFileIndex = 0;
@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
this.collector = collector;
this.conf = conf;
String dirPath = conf.get("file.spout.dir.path").toString();
File dir = new File(dirPath);
files = dir.listFiles();
Arrays.sort(files, new Comparator<File>(){
public int compare(File f1, File f2) {
int res = f1.lastModified() < f2.lastModified() ? -1 : ( f1.lastModified() > f2.lastModified() ? 1 : 0);
return res;
} });
openNextFile();
}
@Override
public void nextTuple() {
String record = readFile();
String[] items = record.split("\\s+");
String entityID = items[0];
String recordData = items[1];
collector.emit(new Values(entityID, recordData));
}
/**
* @return
*/
private String readFile() {
String record = null;
if (scanner.hasNextLine()) {
record = scanner.nextLine();
} else {
if (++curFileIndex < files.length) {
openNextFile();
if (scanner.hasNextLine()) {
record = scanner.nextLine();
}
} else {
//no more files to read
}
}
return record;
}
/**
*
*/
private void openNextFile() {
try {
scanner = new Scanner(files[curFileIndex]);
} catch (FileNotFoundException e) {
throw new IllegalStateException("file not found");
}
}
/* (non-Javadoc)
* @see backtype.storm.topology.IComponent#declareOutputFields(backtype.storm.topology.OutputFieldsDeclarer)
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("entityID", "recordData"));
}
}