/** * Copyright (c) 2011-2014, OpenIoT * * This file is part of OpenIoT. * * OpenIoT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * OpenIoT is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with OpenIoT. If not, see <http://www.gnu.org/licenses/>. * * Contact: OpenIoT mailto: info@openiot.eu * @author Mehdi Riahi * @author Ali Salehi * @author Timotee Maret * @author Sofiane Sarni * @author Milos Stojanovic * @author Hylke van der Schaaf */ package org.openiot.gsn.wrappers.general; import org.openiot.gsn.beans.AddressBean; import org.openiot.gsn.beans.DataField; import org.openiot.gsn.beans.StreamElement; import org.openiot.gsn.wrappers.AbstractWrapper; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.TreeMap; import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; /** * Timezones: http://joda-time.sourceforge.net/timezones.html * Formatting: http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html */ public class CSVWrapper extends AbstractWrapper { private final transient Logger logger = Logger.getLogger(CSVWrapper.class); private static int threadCounter = 0; private DataField[] dataField; private CSVHandler handler = new CSVHandler(); private int samplingPeriodInMsc; /** * The maximum number of samples to read from the file per sampling period. */ private int samplingCountPerPeriod; private String checkPointDir; private String dataFileName; boolean useCounterForCheckPoint = false; long processedLineCounter = 0; // counts lines processed when checkpoint use counter to track changes (instead of timestamp, by default) @Override public boolean initialize() { setName("CSVWrapper-Thread" + (++threadCounter)); AddressBean addressBean = getActiveAddressBean(); dataFileName = addressBean.getPredicateValueWithException("file"); String csvFields = addressBean.getPredicateValueWithException("fields"); String csvFormats = addressBean.getPredicateValueWithException("formats"); //String csvSeparator = addressBean.getPredicateValueWithDefault("separator",","); String value = addressBean.getPredicateValue("separator"); String csvSeparator = (value == null || value.length() == 0) ? "," : value; checkPointDir = addressBean.getPredicateValueWithDefault("check-point-directory", "./csv-check-points"); String csvStringQuote = addressBean.getPredicateValueWithDefault("quote", "\""); int skipFirstXLine = addressBean.getPredicateValueAsInt("skip-first-lines", 0); String timezone = addressBean.getPredicateValueWithDefault("timezone", CSVHandler.LOCAL_TIMEZONE_ID); String nullValues = addressBean.getPredicateValueWithDefault("bad-values", ""); String strUseCounterForCheckPoint = addressBean.getPredicateValueWithDefault("use-counter-for-check-point", "false"); samplingPeriodInMsc = addressBean.getPredicateValueAsInt("sampling", 10000); samplingCountPerPeriod = addressBean.getPredicateValueAsInt("sampling-count", 250); /* DEBUG_INFO(dataFile); */ if (csvSeparator != null && csvSeparator.length() != 1) { logger.warn("The provided CSV separator:>" + csvSeparator + "< should only have 1 character, thus ignored and instead \",\" is used."); csvSeparator = ","; } if (csvStringQuote.length() != 1) { logger.warn("The provided CSV quote:>" + csvSeparator + "< should only have 1 character, thus ignored and instead '\"' is used."); csvStringQuote = "\""; } try { if (strUseCounterForCheckPoint.equalsIgnoreCase("true")) { useCounterForCheckPoint = true; logger.warn("Using counter-based check points"); } //String checkPointFile = new File(checkPointDir).getAbsolutePath()+"/"+(new File(dataFile).getName())+"-"+addressBean.hashCode(); StringBuilder checkPointFile = new StringBuilder() .append(new File(checkPointDir).getAbsolutePath()) .append("/") .append(addressBean.getVirtualSensorName()) .append("_") .append(addressBean.getInputStreamName()) .append("_") .append(addressBean.getWrapper()) .append("_") .append(new File(dataFileName).getName()); if (!handler.initialize(dataFileName.trim(), csvFields, csvFormats, csvSeparator.toCharArray()[0], csvStringQuote.toCharArray()[0], skipFirstXLine, nullValues, timezone, checkPointFile.toString())) { return false; } String val = FileUtils.readFileToString(new File(checkPointFile.toString()), "UTF-8"); long lastItem = 0; if (val != null && val.trim().length() > 0) { lastItem = Long.parseLong(val.trim()); } logger.warn("Latest item: " + lastItem); if (useCounterForCheckPoint) { processedLineCounter = lastItem; } } catch (IOException | NumberFormatException e) { logger.error("Loading the csv-wrapper failed:" + e.getMessage(), e); return false; } dataField = handler.getDataFields(); logger.warn("Reading from: " + dataFileName); return true; } @Override public void run() { Exception previousError = null; long previousModTime = -1; long previousCheckModTime = -1; while (isActive()) { File dataFile = new File(handler.getDataFile()); File chkPointFile = new File(handler.getCheckPointFile()); long lastModified = -1; long lastModifiedCheckPoint = -1; if (dataFile.isFile()) { lastModified = dataFile.lastModified(); } if (chkPointFile.isFile()) { lastModifiedCheckPoint = chkPointFile.lastModified(); } FileReader reader = null; /* DEBUG_INFO("* Entry *"); DEBUG_INFO(list("lastModified", lastModified)); DEBUG_INFO(list("lastModifiedCheckPoint", lastModifiedCheckPoint)); */ try { ArrayList<TreeMap<String, Serializable>> output; if (previousError == null || ((lastModified != previousModTime || lastModifiedCheckPoint != previousCheckModTime) || useCounterForCheckPoint)) { reader = new FileReader(handler.getDataFile()); output = handler.work(reader, checkPointDir, samplingCountPerPeriod); for (TreeMap<String, Serializable> se : output) { StreamElement streamElement = new StreamElement(se, getOutputFormat()); processedLineCounter++; logger.warn(se); boolean insertionSuccess = postStreamElement(streamElement); if (!insertionSuccess) { logger.error("Insert failed."); } if (!useCounterForCheckPoint) { handler.updateCheckPointFile(streamElement.getTimeStamp()); // write latest processed timestamp } else { handler.updateCheckPointFile(processedLineCounter); // write latest processed line number } } } //if (output==null || output.size()==0) //More intelligent sleeping, being more proactive once the wrapper receives huge files. Thread.sleep(samplingPeriodInMsc); } catch (IOException | InterruptedException e) { if (previousError != null && previousError.getMessage().equals(e.getMessage())) { continue; } logger.error(e.getMessage() + " :: " + dataFile, e); previousError = e; previousModTime = lastModified; previousCheckModTime = lastModifiedCheckPoint; } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { logger.debug(e.getMessage(), e); } } } /* DEBUG_INFO("* Exit *"); */ } } @Override public DataField[] getOutputFormat() { return dataField; } @Override public String getWrapperName() { return this.getClass().getName(); } @Override public void dispose() { threadCounter--; } /* * Convenient function used for debugging * */ public void DEBUG_INFO(String s) { String date = new java.text.SimpleDateFormat("MM/dd/yyyy HH:mm:ss,SSS").format(new java.util.Date(System.currentTimeMillis())); s = "[" + date + "] " + s + "\n"; try { FileUtils.writeStringToFile(new File("DEBUG_INFO_" + threadCounter + ".txt"), s, true); } catch (IOException e) { logger.error("", e); } } String list(String name, long value) { return name + " = " + value + " (" + new java.text.SimpleDateFormat("MM/dd/yyyy HH:mm:ss,SSS").format(new java.util.Date(value)) + ")"; } }