package org.esa.snap.timeseries.core.insitu.csv;
import org.esa.snap.core.datamodel.GeoPos;
import org.esa.snap.timeseries.core.insitu.Header;
import org.esa.snap.timeseries.core.insitu.Record;
import org.esa.snap.timeseries.core.insitu.RecordSource;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Reader;
import java.text.DateFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
/**
* A record source that reads from a CSV stream. Values must be separated by a TAB character, records by a NL (newline).
* The first records must contain header names. All non-header records must use the same data type in a column.
*
* @author Norman
*/
public class CsvRecordSource implements RecordSource {
private static final String[] LAT_NAMES = new String[]{"lat", "latitude", "northing"};
private static final String[] LON_NAMES = new String[]{"lon", "long", "longitude", "easting"};
private static final String[] TIME_NAMES = new String[]{"time", "date"};
private static final String[] STATION_NAMES = new String[]{"name", "station", "label"};
private final LineNumberReader reader;
private final Header header;
private final int recordLength;
private final DateFormat dateFormat;
private final int latIndex;
private final int lonIndex;
private final int timeIndex;
private final int stationNameIndex;
private final Class<?>[] attributeTypes;
private Iterable<Record> recordIterable;
private CsvRecordIterator csvRecordIterator;
public CsvRecordSource(Reader reader, DateFormat dateFormat) throws IOException {
if (reader instanceof LineNumberReader) {
this.reader = (LineNumberReader) reader;
} else {
this.reader = new LineNumberReader(reader);
}
this.dateFormat = dateFormat;
String[] columnNames = readTextRecords(-1).get(0);
attributeTypes = new Class<?>[columnNames.length];
latIndex = indexOf(columnNames, LAT_NAMES);
lonIndex = indexOf(columnNames, LON_NAMES);
timeIndex = indexOf(columnNames, TIME_NAMES);
stationNameIndex = indexOf(columnNames, STATION_NAMES);
final String[] parameterNames = getParameterNames(columnNames);
final boolean hasLocation = latIndex >= 0 && lonIndex >= 0;
final boolean hasTime = timeIndex >= 0;
final boolean hasStationName = stationNameIndex >= 0;
header = new DefaultHeader(hasLocation, hasTime, hasStationName, columnNames, parameterNames);
recordLength = columnNames.length;
}
@Override
public Header getHeader() {
return header;
}
@Override
public Iterable<Record> getRecords() {
if (recordIterable == null) {
recordIterable = createIterable();
}
if (csvRecordIterator != null) {
csvRecordIterator.currentRecord = 0;
}
return recordIterable;
}
@Override
public void close() {
try {
reader.close();
} catch (IOException ignore) {
}
}
private Iterable<Record> createIterable() {
return () -> {
if (csvRecordIterator == null) {
try {
csvRecordIterator = new CsvRecordIterator(readTextRecords(recordLength));
} catch (IOException e) {
e.printStackTrace();
}
}
return csvRecordIterator;
};
}
private String[] getParameterNames(String[] columnNames) {
final int[] sortedIndices = {latIndex, lonIndex, timeIndex, stationNameIndex};
Arrays.sort(sortedIndices);
final List<String> parameterNames = new ArrayList<>();
Collections.addAll(parameterNames, columnNames);
for (int i = sortedIndices.length - 1; i >= 0; i--) {
final int index = sortedIndices[i];
if (index > -1) {
parameterNames.remove(index);
}
}
return parameterNames.toArray(new String[parameterNames.size()]);
}
/**
* Converts a string array into an array of object which are either a number ({@link Double}), a text ({@link String}),
* a date/time value ({@link Date}). Empty text is converted to {@code null}.
*
* @param textValues The text values to convert.
* @param types The types.
* @param dateFormat The date format to be used.
*
* @return The array of converted objects.
*/
private static Object[] toObjects(String[] textValues, Class<?>[] types, DateFormat dateFormat) {
final Object[] values = new Object[textValues.length];
for (int i = 0; i < textValues.length; i++) {
final String text = textValues[i];
if (text != null && !text.isEmpty()) {
final Object value;
final Class<?> type = types[i];
if (type != null) {
value = parse(text, type, dateFormat);
} else {
value = parse(text, dateFormat);
if (value != null) {
types[i] = value.getClass();
}
}
values[i] = value;
}
}
return values;
}
private static int indexOf(String[] textValues, String[] possibleValues) {
for (String possibleValue : possibleValues) {
for (int index = 0; index < textValues.length; index++) {
if (possibleValue.equalsIgnoreCase(textValues[index])) {
return index;
}
}
}
return -1;
}
private static String[] splitRecordLine(String line, int recordLength) {
int pos2;
int pos1 = 0;
ArrayList<String> strings = new ArrayList<>(256);
while ((pos2 = line.indexOf('\t', pos1)) >= 0) {
strings.add(line.substring(pos1, pos2).trim());
if (recordLength > 0 && strings.size() >= recordLength) {
break;
}
pos1 = pos2 + 1;
}
strings.add(line.substring(pos1).trim());
if (recordLength > 0) {
return strings.toArray(new String[recordLength]);
} else {
return strings.toArray(new String[strings.size()]);
}
}
private List<String[]> readTextRecords(int recordLength) throws IOException {
final List<String[]> result = new ArrayList<>();
String line;
while ((line = reader.readLine()) != null) {
String trimLine = line.trim();
if (!trimLine.startsWith("#") && !trimLine.isEmpty()) {
result.add(splitRecordLine(line, recordLength));
if (recordLength < 0) {
return result;
}
}
}
return result;
}
private static Object parse(String text, Class<?> type, DateFormat dateFormat) {
if (type.equals(Double.class)) {
try {
return parseDouble(text);
} catch (NumberFormatException e) {
return Double.NaN;
}
} else if (type.equals(String.class)) {
return text;
} else if (type.equals(Date.class)) {
try {
return dateFormat.parse(text);
} catch (ParseException e) {
return new Date(0L);
}
} else {
throw new IllegalStateException("Unhandled data type: " + type);
}
}
private static Object parse(String text, DateFormat dateFormat) {
try {
return parseDouble(text);
} catch (NumberFormatException e) {
try {
return dateFormat.parse(text);
} catch (ParseException e1) {
return text;
}
}
}
private static Double parseDouble(String text) {
try {
return Double.valueOf(text);
} catch (NumberFormatException e) {
if (text.equalsIgnoreCase("nan")) {
return Double.NaN;
} else if (text.equalsIgnoreCase("inf") || text.equalsIgnoreCase("infinity")) {
return Double.POSITIVE_INFINITY;
} else if (text.equalsIgnoreCase("-inf") || text.equalsIgnoreCase("-infinity")) {
return Double.NEGATIVE_INFINITY;
} else {
throw e;
}
}
}
private class CsvRecordIterator extends RecordIterator {
List<String[]> records;
private int currentRecord;
private CsvRecordIterator(List<String[]> records) {
currentRecord = 0;
this.records = records;
}
@Override
protected Record getNextRecord() {
if (records.size() <= currentRecord) {
return null;
}
String[] record = records.get(currentRecord);
currentRecord++;
if (getHeader().getColumnNames().length != record.length) {
System.out.println("too few values " + Arrays.toString(record));
}
final Object[] values = toObjects(record, attributeTypes, dateFormat);
final GeoPos location;
if (header.hasLocation() && values[latIndex] instanceof Number && values[lonIndex] instanceof Number) {
location = new GeoPos(((Number) values[latIndex]).floatValue(),
((Number) values[lonIndex]).floatValue());
} else {
location = null;
}
final Date time;
if (header.hasTime() && values[timeIndex] instanceof Date) {
time = values[timeIndex] instanceof Date ? (Date) values[timeIndex] : null;
} else {
time = null;
}
final String stationName;
if (header.hasStationName()) {
stationName = (String) values[stationNameIndex];
} else {
stationName = time != null ? time.toString() : "Unknown";
}
return new DefaultRecord(location, time, stationName, values);
}
}
}