/**
* Global Sensor Networks (GSN) Source Code
* Copyright (c) 2006-2016, Ecole Polytechnique Federale de Lausanne (EPFL)
*
* This file is part of GSN.
*
* GSN is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GSN is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GSN. If not, see <http://www.gnu.org/licenses/>.
*
* File: src/ch/epfl/gsn/wrappers/general/CSVWrapper.java
*
* @author Mehdi Riahi
* @author Ali Salehi
* @author Timotee Maret
* @author Sofiane Sarni
* @author Milos Stojanovic
*
*/
package ch.epfl.gsn.wrappers.general;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.TreeMap;
import org.apache.commons.io.FileUtils;
import org.slf4j.LoggerFactory;
import ch.epfl.gsn.beans.AddressBean;
import ch.epfl.gsn.beans.DataField;
import ch.epfl.gsn.beans.StreamElement;
import ch.epfl.gsn.wrappers.AbstractWrapper;
import org.slf4j.Logger;
/**
* Timezones: http://joda-time.sourceforge.net/timezones.html
* Formatting: http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html
*/
public class CSVWrapper extends AbstractWrapper {
private final transient Logger logger = LoggerFactory.getLogger(CSVWrapper.class);
private static int threadCounter = 0;
private DataField[] dataField;
private CSVHandler handler = new CSVHandler();
private int samplingPeriodInMsc;
private String checkPointDir;
private String dataFile;
boolean useCounterForCheckPoint = false;
long processedLineCounter = 0; // counts lines processed when checkpoint use counter to track changes (instead of timestamp, by default)
public boolean initialize() {
AddressBean addressBean = getActiveAddressBean();
dataFile = addressBean.getPredicateValueWithException("file");
String csvFields = addressBean.getPredicateValueWithException("fields");
String csvFormats = addressBean.getPredicateValueWithException("formats");
//String csvSeparator = addressBean.getPredicateValueWithDefault("separator",",");
String value = addressBean.getPredicateValue("separator");
String csvSeparator = (value == null || value.length() == 0) ? "," : value;
checkPointDir = addressBean.getPredicateValueWithDefault("check-point-directory", "./csv-check-points");
String csvStringQuote = addressBean.getPredicateValueWithDefault("quote", "\"");
int skipFirstXLine = addressBean.getPredicateValueAsInt("skip-first-lines", 0);
String timezone = addressBean.getPredicateValueWithDefault("timezone", handler.LOCAL_TIMEZONE_ID);
String nullValues = addressBean.getPredicateValueWithDefault("bad-values", "");
String strUseCounterForCheckPoint = addressBean.getPredicateValueWithDefault("use-counter-for-check-point", "false");
samplingPeriodInMsc = addressBean.getPredicateValueAsInt("sampling", 10000);
/*
DEBUG_INFO(dataFile);
*/
if (csvSeparator != null && csvSeparator.length() != 1) {
logger.warn("The provided CSV separator:>" + csvSeparator + "< should only have 1 character, thus ignored and instead \",\" is used.");
csvSeparator = ",";
}
if (csvStringQuote.length() != 1) {
logger.warn("The provided CSV quote:>" + csvSeparator + "< should only have 1 character, thus ignored and instead '\"' is used.");
csvStringQuote = "\"";
}
try {
if (strUseCounterForCheckPoint.equalsIgnoreCase("true")) {
useCounterForCheckPoint = true;
logger.warn("Using counter-based check points");
}
//String checkPointFile = new File(checkPointDir).getAbsolutePath()+"/"+(new File(dataFile).getName())+"-"+addressBean.hashCode();
StringBuilder checkPointFile = new StringBuilder()
.append(new File(checkPointDir).getAbsolutePath())
.append("/")
.append(addressBean.getVirtualSensorName())
.append("_")
.append(addressBean.getInputStreamName())
.append("_")
.append(addressBean.getWrapper())
.append("_")
.append(new File(dataFile).getName());
if (!handler.initialize(dataFile.trim(), csvFields, csvFormats, csvSeparator.toCharArray()[0], csvStringQuote.toCharArray()[0], skipFirstXLine, nullValues, timezone, checkPointFile.toString()))
return false;
String val = FileUtils.readFileToString(new File(checkPointFile.toString()), "UTF-8");
long lastItem = 0;
if (val != null && val.trim().length() > 0)
lastItem = Long.parseLong(val.trim());
logger.warn("Latest item: "+lastItem);
if (useCounterForCheckPoint) {
processedLineCounter = lastItem;
}
} catch (Exception e) {
logger.error("Loading the csv-wrapper failed:" + e.getMessage(), e);
return false;
}
dataField = handler.getDataFields();
logger.warn("Reading from: " + dataFile);
return true;
}
public void run() {
Exception preivousError = null;
long previousModTime = -1;
long previousCheckModTime = -1;
while (isActive()) {
File dataFile = new File(handler.getDataFile());
File chkPointFile = new File(handler.getCheckPointFile());
long lastModified = -1;
long lastModifiedCheckPoint = -1;
if (dataFile.isFile())
lastModified = dataFile.lastModified();
if (chkPointFile.isFile())
lastModifiedCheckPoint = chkPointFile.lastModified();
FileReader reader = null;
/*
DEBUG_INFO("* Entry *");
DEBUG_INFO(list("lastModified", lastModified));
DEBUG_INFO(list("lastModifiedCheckPoint", lastModifiedCheckPoint));
*/
try {
ArrayList<TreeMap<String, Serializable>> output = null;
if (preivousError == null || (preivousError != null && ((lastModified != previousModTime || lastModifiedCheckPoint != previousCheckModTime) || useCounterForCheckPoint))) {
reader = new FileReader(handler.getDataFile());
output = handler.work(reader, checkPointDir);
for (TreeMap<String, Serializable> se : output) {
StreamElement streamElement = new StreamElement(se, getOutputFormat());
String [] ses = streamElement.getFieldNames();
processedLineCounter++;
for (int i=0;i<ses.length; i++){
if ("anetz_snow_height".equalsIgnoreCase(ses[i]) || "mst_surface_temp".equalsIgnoreCase(ses[i])){
logger.warn(dataFile+" : "+se);
break;
}
}
boolean insertionSuccess = postStreamElement(streamElement);
if (!useCounterForCheckPoint)
handler.updateCheckPointFile(streamElement.getTimeStamp()); // write latest processed timestamp
else
handler.updateCheckPointFile(processedLineCounter); // write latest processed line number
}
}
//if (output==null || output.size()==0) //More intelligent sleeping, being more proactive once the wrapper receives huge files.
Thread.sleep(samplingPeriodInMsc);
} catch (Exception e) {
if (preivousError != null && preivousError.getMessage().equals(e.getMessage()))
continue;
logger.error(e.getMessage() + " :: " + dataFile, e);
preivousError = e;
previousModTime = lastModified;
previousCheckModTime = lastModifiedCheckPoint;
} finally {
if (reader != null)
try {
reader.close();
} catch (IOException e) {
logger.debug(e.getMessage(), e);
}
}
/*
DEBUG_INFO("* Exit *");
*/
}
}
public DataField[] getOutputFormat() {
return dataField;
}
public String getWrapperName() {
return this.getClass().getName();
}
public void dispose() {
threadCounter--;
}
/*
* Convenient function used for debugging
* */
public void DEBUG_INFO(String s) {
String date = new java.text.SimpleDateFormat("MM/dd/yyyy HH:mm:ss,SSS").format(new java.util.Date(System.currentTimeMillis()));
s = "[" + date + "] " + s + "\n";
try {
FileUtils.writeStringToFile(new File("DEBUG_INFO_" + threadCounter + ".txt"), s, true);
} catch (IOException e) {
e.printStackTrace();
}
}
String list(String name, long value) {
return name + " = " + value + " (" + new java.text.SimpleDateFormat("MM/dd/yyyy HH:mm:ss,SSS").format(new java.util.Date(value)) + ")";
}
}