/*****************************************************************************
* Limpet - the Lightweight InforMation ProcEssing Toolkit
* http://limpet.info
*
* (C) 2015-2016, Deep Blue C Technologies Ltd
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the Eclipse Public License v1.0
* (http://www.eclipse.org/legal/epl-v10.html)
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*****************************************************************************/
package info.limpet.data.csv;
import info.limpet.ICollection;
import info.limpet.IQuantityCollection;
import info.limpet.IStoreItem;
import info.limpet.ITemporalQuantityCollection;
import info.limpet.data.impl.ObjectCollection;
import info.limpet.data.impl.samples.StockTypes.NonTemporal;
import info.limpet.data.impl.samples.StockTypes.Temporal;
import info.limpet.data.impl.samples.StockTypes.Temporal.Strings;
import info.limpet.data.impl.samples.TemporalLocation;
import info.limpet.data.operations.spatial.GeoSupport;
import info.limpet.data.store.StoreGroup;
import java.awt.geom.Point2D;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
public class CsvParser
{
// 21/09/2015 07:00:31
private static final DateFormat DATE_FORMAT = new SimpleDateFormat(
"dd/MM/yyyy hh:mm:ss");
private static final DateFormat TIME_FORMAT =
new SimpleDateFormat("hh:mm:ss");
private ArrayList<DataImporter> _candidates;
public List<IStoreItem> parse(String filePath) throws IOException
{
final List<IStoreItem> res = new ArrayList<IStoreItem>();
final File inFile = new File(filePath);
final Reader in =
new InputStreamReader(new FileInputStream(inFile), Charset
.forName("UTF-8"));
final String fullFileName = inFile.getName();
final String fileName = filePrefix(fullFileName);
final Iterable<CSVRecord> records = CSVFormat.DEFAULT.parse(in);
boolean first = true;
// generate our list of importers
createImporters();
final DataImporter temporalDimensionless =
new TemporalSeriesSupporter<Temporal.DimensionlessDouble>(
Temporal.DimensionlessDouble.class, null, null);
final DataImporter temporalStrings = new TemporalStringImporter();
final DataImporter strings = new StringImporter();
final DataImporter dimensionless =
new SeriesSupporter<NonTemporal.DimensionlessDouble>(
NonTemporal.DimensionlessDouble.class, null, null);
// store one importer per column-set
List<DataImporter> importers = new ArrayList<DataImporter>();
// and store one series per column-set
List<ICollection> series = new ArrayList<ICollection>();
boolean isTime = false;
DateFormat customDateFormat = null;
for (CSVRecord record : records)
{
if (first)
{
first = false;
String time = record.get(0);
int ctr = 0;
if (time != null && time.toLowerCase().startsWith("time"))
{
// is it plain time?
if (!time.toLowerCase().equals("time"))
{
// ok, see if we have a time format string
if (time.contains("(") && time.contains(")"))
{
// ok, extract the format string
String formatStr =
time.substring(time.indexOf("(") + 1, time.indexOf(")"));
customDateFormat = new SimpleDateFormat(formatStr);
}
}
isTime = true;
ctr = 1;
}
else
{
ctr = 0;
}
while (ctr < record.size())
{
String nextVal = record.get(ctr);
// have a look at it.
int i1 = nextVal.indexOf("(");
String colName;
if (i1 > 0)
{
// ok, we have units
colName = nextVal.substring(0, i1).trim();
}
else
{
// no, no units
colName = nextVal.trim();
}
// see if anybody can handle this name
boolean handled = false;
Iterator<DataImporter> cIter = _candidates.iterator();
while (cIter.hasNext())
{
DataImporter thisI = cIter.next();
if (thisI.handleName(colName))
{
importers.add(thisI);
series.add(thisI.create(fileName + "-" + thisI.nameFor(colName)));
handled = true;
ctr += thisI.numCols();
break;
}
}
if (!handled)
{
int i2 = nextVal.indexOf(")");
if (i2 > 0 && i2 > i1 + 1)
{
final String units = nextVal.substring(i1 + 1, i2).trim();
Iterator<DataImporter> cIter2 = _candidates.iterator();
while (cIter2.hasNext())
{
DataImporter thisI = cIter2.next();
if (thisI.handleUnits(units))
{
importers.add(thisI);
series.add(thisI.create(fileName + "-"
+ thisI.nameFor(colName)));
ctr += thisI.numCols();
handled = true;
break;
}
}
}
}
// have we managed it?
if (!handled)
{
// ok, in that case we don't know. Let's introduce a deferred
// decision
// maker, so we can make a decision once we've read in some data
importers.add(new DeferredLoadSupporter(colName));
series.add(new ObjectCollection<String>("null"));
ctr += 1;
}
}
}
else
{
String firstRow = record.get(0);
long theTime = -1;
int thisCol = 0;
// ok, we're out of the first row
if (isTime)
{
// ok, get the time field
try
{
// do we have a custom date format
final DateFormat thisFormat;
if (customDateFormat != null)
{
thisFormat = customDateFormat;
}
else
{
int len = firstRow.length();
if (len < 10)
{
thisFormat = TIME_FORMAT;
}
else
{
thisFormat = DATE_FORMAT;
}
}
Date date = thisFormat.parse(firstRow);
theTime = date.getTime();
thisCol = 1;
}
catch (ParseException e)
{
e.printStackTrace();
}
}
else
{
// not temporal, use this field
thisCol = 0;
}
// now move through the other cols
int numImporters = importers.size();
for (int i = 0; i < numImporters; i++)
{
DataImporter thisI = importers.get(i);
// ok, just check if this is a deferred importer
if (thisI instanceof DeferredLoadSupporter)
{
DeferredLoadSupporter dl = (DeferredLoadSupporter) thisI;
String seriesName = dl.getName();
// ok, have a look at the next field
String nextVal = record.get(thisCol);
// is it numeric?
DataImporter importer = null;
// ok, treat it as string data
if (isTime)
{
if (isNumeric(nextVal))
{
// ok, we've got dimensionless quantity data
importer = temporalDimensionless;
}
else
{
importer = temporalStrings;
}
}
else
{
if (isNumeric(nextVal))
{
// ok, we've got dimensionless quantity data
importer = dimensionless;
}
else
{
importer = strings;
}
}
if (importer != null)
{
int index = importers.indexOf(dl);
importers.set(index, importer);
series.set(index, importer.create(fileName + "-" + seriesName));
thisI = importer;
}
}
ICollection thisS = series.get(i);
thisI.consume(thisS, theTime, thisCol, record);
thisCol += thisI.numCols();
}
}
}
// ok, store the series
if (series.size() > 1)
{
StoreGroup target = new StoreGroup(fullFileName);
Iterator<ICollection> sIter = series.iterator();
while (sIter.hasNext())
{
ICollection coll = (ICollection) sIter.next();
target.add(coll);
}
res.add(target);
}
else
{
Iterator<ICollection> sIter = series.iterator();
while (sIter.hasNext())
{
ICollection coll = (ICollection) sIter.next();
res.add(coll);
}
}
return res;
}
public static DateFormat getDateFormat()
{
return DATE_FORMAT;
}
public static DateFormat getTimeFormat()
{
return TIME_FORMAT;
}
private String filePrefix(String fullPath)
{
// gets filename without extension
return fullPath.split("\\.(?=[^\\.]+$)")[0];
}
private void createImporters()
{
if (_candidates != null)
{
return;
}
_candidates = new ArrayList<DataImporter>();
_candidates.add(new LocationImporter());
_candidates.add(new TemporalSeriesSupporter<Temporal.ElapsedTimeSec>(
Temporal.ElapsedTimeSec.class, null, "secs"));
_candidates.add(new TemporalSeriesSupporter<Temporal.FrequencyHz>(
Temporal.FrequencyHz.class, null, "Hz"));
_candidates.add(new TemporalSeriesSupporter<Temporal.TurnRate>(
Temporal.TurnRate.class, null, "Degs/sec"));
_candidates.add(new TemporalSeriesSupporter<Temporal.LengthM>(
Temporal.LengthM.class, null, "m"));
_candidates.add(new TemporalSeriesSupporter<Temporal.LengthYd>(
Temporal.LengthM.class, null, "yds"));
_candidates.add(new TemporalSeriesSupporter<Temporal.AngleDegrees>(
Temporal.AngleDegrees.class, null, "Degs"));
_candidates.add(new TemporalSeriesSupporter<Temporal.SpeedKts>(
Temporal.SpeedKts.class, null, "kts"));
_candidates.add(new TemporalSeriesSupporter<Temporal.SpeedMSec>(
Temporal.SpeedMSec.class, null, "M/Sec"));
_candidates.add(new TemporalSeriesSupporter<Temporal.TemperatureC>(
Temporal.TemperatureC.class, null, "C"));
}
public static boolean isNumeric(String str)
{
try
{
@SuppressWarnings("unused")
double d = Double.parseDouble(str);
}
catch (NumberFormatException nfe)
{
return false;
}
return true;
}
/**
* base helper class, to help importing series of data
*
* @author ian
*
*/
public abstract static class DataImporter
{
private final String _units;
private final String _colName;
private Class<?> _classType;
/**
* constructor
*
* @param classType
* the type of series we represent (used for default constructor)
* @param colName
* name of the column we store
* @param units
* name of the units we store
*/
protected DataImporter(Class<?> classType, String colName, String units)
{
_units = units;
_colName = colName;
_classType = classType;
}
/**
* create an instance of this series, using the specified name
*
* @param name
* @return
*/
public ICollection create(String name)
{
ICollection res = null;
try
{
res = (ICollection) _classType.newInstance();
res.setName(name);
}
catch (InstantiationException | IllegalAccessException e)
{
e.printStackTrace();
}
return res;
}
/**
* what should this series be called, if the supplied column name is found
*
*/
public String nameFor(String colName)
{
return colName;
}
/**
* read some data from this record
*
* @param series
* target series
* @param thisTime
* this time stamp
* @param colStart
* column to start reading from
* @param row
* current row of data
*/
public abstract void consume(ICollection series, long thisTime,
int colStart, CSVRecord row);
/**
* can we handle this column name?
*
* @param colName
* @return
*/
public final boolean handleName(String colName)
{
if (_colName == null)
{
return false;
}
else
{
return _colName.equals(colName);
}
}
/**
* can we handle this units type?
*
* @param units
* @return
*/
public final boolean handleUnits(String units)
{
if (_units == null)
{
return false;
}
else
{
return _units.equals(units);
}
}
/**
* how many columns do we consume?
*
* @return
*/
public int numCols()
{
return 1;
}
}
/**
* class to handle importing time-related strings
*
* @author ian
*
*/
protected static class TemporalStringImporter extends DataImporter
{
protected TemporalStringImporter()
{
super(Temporal.Strings.class, null, null);
}
@Override
public void consume(ICollection series, long thisTime, int colStart,
CSVRecord row)
{
String thisVal = row.get(colStart);
Temporal.Strings thisS = (Strings) series;
thisS.add(thisTime, thisVal);
}
}
/**
* class to handle importing time-related strings
*
* @author ian
*
*/
protected static class StringImporter extends DataImporter
{
protected StringImporter()
{
super(NonTemporal.Strings.class, null, null);
}
@Override
public void consume(ICollection series, long thisTime, int colStart,
CSVRecord row)
{
String thisVal = row.get(colStart);
NonTemporal.Strings thisS = (NonTemporal.Strings) series;
thisS.add(thisVal);
}
}
/**
* class to handle importing two columns of location data
*
* @author ian
*
*/
protected static class LocationImporter extends DataImporter
{
protected LocationImporter()
{
super(TemporalLocation.class, "Lat", null);
}
public String nameFor(String colName)
{
return "Location";
}
public TemporalLocation create(String name)
{
return new TemporalLocation(name);
}
public void consume(ICollection series, long thisTime, int colStart,
CSVRecord row)
{
final TemporalLocation locS = (TemporalLocation) series;
String latVal = row.get(colStart);
Double valLat = Double.parseDouble(latVal);
String longVal = row.get(colStart + 1);
Double valLong = Double.parseDouble(longVal);
Point2D point = GeoSupport.getCalculator().createPoint(valLong, valLat);
locS.add(thisTime, point);
}
public int numCols()
{
return 2;
}
}
/**
* generic class to handle importing series of data
*
* @author ian
*
* @param <T>
*/
protected static class SeriesSupporter<T extends IQuantityCollection<?>>
extends DataImporter
{
protected SeriesSupporter(Class<?> classType, String colName, String units)
{
super(classType, colName, units);
}
protected void add(ICollection series, long time, Number quantity)
{
IQuantityCollection<?> target = (IQuantityCollection<?>) series;
target.add(quantity);
}
@Override
public void consume(ICollection series, long thisTime, int colStart,
CSVRecord row)
{
String thisVal = row.get(colStart);
Double val = Double.parseDouble(thisVal);
add(series, thisTime, val);
}
@Override
public int numCols()
{
return 1;
}
}
/**
* generic class to handle importing series of data
*
* @author ian
*
* @param <T>
*/
protected static class TemporalSeriesSupporter<T extends ITemporalQuantityCollection<?>>
extends DataImporter
{
protected TemporalSeriesSupporter(Class<?> classType, String colName,
String units)
{
super(classType, colName, units);
}
protected void add(ICollection series, long time, Number quantity)
{
ITemporalQuantityCollection<?> target =
(ITemporalQuantityCollection<?>) series;
target.add(time, quantity);
}
@Override
public void consume(ICollection series, long thisTime, int colStart,
CSVRecord row)
{
String thisVal = row.get(colStart);
Double val = Double.parseDouble(thisVal);
add(series, thisTime, val);
}
@Override
public int numCols()
{
return 1;
}
}
protected static class DeferredLoadSupporter extends DataImporter
{
private final String name;
public DeferredLoadSupporter(String name)
{
super(null, null, null);
this.name = name;
}
public String getName()
{
return name;
}
@Override
public void consume(ICollection series, long thisTime, int colStart,
CSVRecord row)
{
throw new RuntimeException(
"We're just temporary - we should never actually be called!");
}
}
}