/** * Copyright (c) 2011-2014, OpenIoT * * This file is part of OpenIoT. * * OpenIoT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * OpenIoT is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with OpenIoT. If not, see <http://www.gnu.org/licenses/>. * * Contact: OpenIoT mailto: info@openiot.eu * @author Ali Salehi * @author Mehdi Riahi * @author Sofiane Sarni * @author Hylke van der Schaaf */ package org.openiot.gsn.wrappers.general; import org.openiot.gsn.beans.DataField; import org.openiot.gsn.utils.CaseInsensitiveComparator; import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.Serializable; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.TreeMap; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import au.com.bytecode.opencsv.CSVReader; /** * possible formats for the timestamp fields are available @ http://joda-time.sourceforge.net/api-release/org/joda/time/format/DateTimeFormat.html * Possible timezone : http://joda-time.sourceforge.net/timezones.html */ public class CSVHandler { public static final String LOCAL_TIMEZONE_ID = DateTimeZone.getDefault().getID(); private static Logger logger = Logger.getLogger(CSVHandler.class); private static final String TIMESTAMP = "timed"; public static DateTime parseTimeStamp(String format, String value) throws IllegalArgumentException { DateTimeFormatter fmt = DateTimeFormat.forPattern(format); return fmt.parseDateTime(value); } private char stringSeparator, separator; private String dataFile; private DateTimeZone timeZone; private int skipFirstXLines; private String[] fields, formats, nulls; private String checkPointFile; public boolean initialize(String dataFile, String inFields, String inFormats, char separator, char stringSeparator, int skipFirstXLines, String nullValues) { return initialize(dataFile, inFields, inFormats, separator, stringSeparator, skipFirstXLines, nullValues, LOCAL_TIMEZONE_ID, "check-poin/" + (new File(dataFile).getName() + ".chk-point")); } public boolean initialize(String dataFile, String inFields, String inFormats, char separator, char stringSeparator, int skipFirstXLines, String nullValues, String timeZone, String checkpointFile) { this.stringSeparator = stringSeparator; // default to , this.skipFirstXLines = skipFirstXLines;// default to 0 this.dataFile = dataFile; // check if it exist. this.separator = separator; this.timeZone = DateTimeZone.forID(timeZone); this.checkPointFile = checkpointFile; File file = new File(dataFile); if (!file.isFile()) { logger.error("The specified CSV data file: " + dataFile + " doesn't exists."); return false; } try { setupCheckPointFileIfNeeded(); this.fields = generateFieldIdx(inFields, true); this.formats = generateFieldIdx(inFormats, false); this.nulls = generateFieldIdx(nullValues, true); //////////////////////// // TODO: Check that the lengths are the same //////////////////////// } catch (IOException e) { logger.error(e.getMessage(), e); return false; } if (!validateFormats(this.formats)) { return false; } if (fields.length != formats.length) { logger.error("loading the csv-wrapper failed as the length of fields(" + fields.length + ") doesn't match the length of formats(" + formats.length + ")"); return false; } return true; } public void setupCheckPointFileIfNeeded() throws IOException { String chkPointDir = new File(new File(getCheckPointFile()).getParent()).getAbsolutePath(); new File(chkPointDir).mkdirs(); new File(getCheckPointFile()).createNewFile(); } public static boolean validateFormats(String[] formats) { for (String format : formats) { if (format.equalsIgnoreCase("numeric") || format.equalsIgnoreCase("string")) { continue; } else if (isTimeStampFormat(format)) { try { String tmp = DateTimeFormat.forPattern(getTimeStampFormat(format)).print(System.currentTimeMillis()); } catch (IllegalArgumentException e) { logger.error("Validating the time-format(" + format + ") used by the CSV-wrapper is failed. "); return false; } } else { logger.error("The format (" + format + ") used by the CSV-Wrapper doesn't exist."); return false; } } return true; } /** * Removes the space from the fields. * Split the rawFields using comma as the separator. * * @param rawFields * @param toLowerCase, if false, the case is preserved. if true, the actual outputs will be in lower-case. * @return * @throws IOException */ public static String[] generateFieldIdx(String rawFields, boolean toLowerCase) throws IOException { String[] toReturn = new CSVReader(new StringReader(rawFields)).readNext(); if (toReturn == null) { return new String[0]; } for (int i = 0; i < toReturn.length; i++) { toReturn[i] = toReturn[i].trim(); if (toLowerCase) { toReturn[i] = toReturn[i].toLowerCase(); } } return toReturn; } public ArrayList<TreeMap<String, Serializable>> work(Reader dataFile, String checkpointDir, int samplingCountPerPeriod) throws IOException { ArrayList<TreeMap<String, Serializable>> items; setupCheckPointFileIfNeeded(); String val = FileUtils.readFileToString(new File(checkPointFile), "UTF-8"); long lastItem = 0; if (val != null && val.trim().length() > 0) lastItem = Long.parseLong(val.trim()); items = parseValues(dataFile, lastItem, samplingCountPerPeriod); return items; } public void updateCheckPointFile(long timestamp) throws IOException { FileUtils.writeStringToFile(new File(checkPointFile), Long.toString(timestamp), "UTF-8"); } private boolean loggedNoChange = false; // to avoid duplicate logging messages when there is no change public ArrayList<TreeMap<String, Serializable>> parseValues(Reader datainput, long previousCheckPoint, int samplingCountPerPeriod) throws IOException { ArrayList<TreeMap<String, Serializable>> toReturn = new ArrayList<TreeMap<String, Serializable>>(); CSVReader reader = new CSVReader(datainput, getSeparator(), getStringSeparator(), getSkipFirstXLines()); String[] values; long currentLine = 0; Serializable currentTimeStamp = null; boolean quit = false; while ((values = reader.readNext()) != null) { TreeMap<String, Serializable> se = convertTo(formats, fields, getNulls(), values, getSeparator()); if (isEmpty(se)) { continue; } if (se.containsKey(TIMESTAMP)) { if (((Long) se.get(TIMESTAMP)) <= previousCheckPoint) { continue; } } else {// assuming useCounterForCheckPoint = true if (logger.isDebugEnabled()) { String symbol = (currentLine < previousCheckPoint) ? " < " : " >= "; logger.debug("currentLine=" + currentLine + symbol + "checkpoint=" + previousCheckPoint); } if (currentLine < previousCheckPoint) {// skipping already read lines, based on line count logger.debug("skipping"); currentLine++; continue; } } if (quit) { if (se.containsKey(TIMESTAMP)) { if (currentTimeStamp == null || !currentTimeStamp.equals(se.get(TIMESTAMP))) { break; } } else { break; } } toReturn.add(se); currentLine++; loggedNoChange = false; if (toReturn.size() >= samplingCountPerPeriod) { // Move outside the loop as in each call we only read x values; // But if we use timeStampMode, still check the next value, since // if the timestamp is the same we have to return it, or data // would be lost. logger.trace("Time to quit."); quit = true; if (se.containsKey(TIMESTAMP)) { currentTimeStamp = se.get(TIMESTAMP); } else { break; } } } if (logger.isDebugEnabled() && toReturn.isEmpty() && loggedNoChange == false) { logger.debug("There is no new item after most recent checkpoint(previousCheckPoint:" + new DateTime(previousCheckPoint) + ")."); loggedNoChange = true; } reader.close(); return toReturn; } private boolean isEmpty(Map<String, Serializable> se) { for (Object o : se.values()) { if (o != null) { return false; } } return true; } public TreeMap<String, Serializable> convertTo(String[] formats, String[] fields, String nullValues[], String[] values, char separator) { TreeMap<String, Serializable> streamElement = new TreeMap<String, Serializable>(new CaseInsensitiveComparator()); for (String field : fields) { streamElement.put(field, null); } HashMap<String, String> timeStampFormats = new HashMap<String, String>(); for (int i = 0; i < Math.min(fields.length, values.length); i++) { if (isNull(nullValues, values[i])) { continue; } else if (formats[i].equalsIgnoreCase("numeric")) { try { streamElement.put(fields[i], Double.parseDouble(values[i])); } catch (java.lang.NumberFormatException e) { logger.error("Parsing to Numeric fails: Value to parse=" + values[i]); throw e; } } else if (formats[i].equalsIgnoreCase("string")) { streamElement.put(fields[i], values[i]); } else if (isTimeStampFormat(formats[i])) { String value = ""; String format = ""; if (streamElement.get(fields[i]) != null) { value = (String) streamElement.get(fields[i]); format = timeStampFormats.get(fields[i]); value += separator; format += separator; } if (isTimeStampLeftPaddedFormat(formats[i])) values[i] = StringUtils.leftPad(values[i], getTimeStampFormat(formats[i]).length(), '0'); value += values[i]; format += getTimeStampFormat(formats[i]); streamElement.put(fields[i], value); timeStampFormats.put(fields[i], format); } } for (String timeField : timeStampFormats.keySet()) { String timeFormat = timeStampFormats.get(timeField); String timeValue = (String) streamElement.get(timeField); try { DateTime x = DateTimeFormat.forPattern(timeFormat).withZone(getTimeZone()).parseDateTime(timeValue); streamElement.put(timeField, x.getMillis()); } catch (IllegalArgumentException e) { logger.error("Parsing error: TimeFormat=" + timeFormat + " , TimeValue=" + timeValue); logger.error(e.getMessage(), e); throw e; } } return streamElement; } public static String getTimeStampFormat(String input) { if (input.contains("timestampl(")) return input.substring("timestampl(".length(), input.indexOf(")")).trim(); else return input.substring("timestamp(".length(), input.indexOf(")")).trim(); } public static boolean isTimeStampFormat(String input) { return (input.toLowerCase().startsWith("timestamp(") || input.toLowerCase().startsWith("timestampl(")) && input.endsWith(")"); } public static boolean isTimeStampLeftPaddedFormat(String input) { return input.toLowerCase().startsWith("timestampl(") && input.endsWith(")"); } public char getSeparator() { return separator; } public char getStringSeparator() { return stringSeparator; } public int getSkipFirstXLines() { return skipFirstXLines; } public static boolean isNull(String[] possibleNullValues, String value) { if (value == null || value.length() == 0) return true; for (int i = 0; i < possibleNullValues.length; i++) if (possibleNullValues[i].equalsIgnoreCase(value.trim())) return true; return false; } public String[] getFields() { return fields; } public DataField[] getDataFields() { HashMap<String, String> dataFields = new HashMap<String, String>(); for (int i = 0; i < getFields().length; i++) { String field = getFields()[i]; String type = getFormats()[i]; if (isTimeStampFormat(type)) { //GSN doesn't support timestamp data type, all timestamp values are supposed to be bigint. dataFields.put(field, "bigint"); } else if (type.equalsIgnoreCase("numeric")) { dataFields.put(field, "numeric"); } else { dataFields.put(field, "string"); } } DataField[] toReturn = new DataField[dataFields.size()]; int i = 0; for (String key : dataFields.keySet()) { toReturn[i++] = new DataField(key, dataFields.get(key)); } return toReturn; } public String[] getFormats() { return formats; } public String getDataFile() { return dataFile; } public String[] getNulls() { return nulls; } public void setSkipFirstXLines(int skipFirstXLines) { this.skipFirstXLines = skipFirstXLines; } public DateTimeZone getTimeZone() { return timeZone; } public String getCheckPointFile() { return checkPointFile; } }