/*
* Copyright 2011 Christian Thiemann <christian@spato.net>
* Developed at Northwestern University <http://rocs.northwestern.edu>
*
* This file is part of the SPaTo Visual Explorer (SPaTo).
*
* SPaTo is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SPaTo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SPaTo. If not, see <http://www.gnu.org/licenses/>.
*/
package net.spato.sve.app.data;
import java.util.Vector;
import processing.core.PApplet;
public class TabulatedData {
// FIXME: handle header/footer lines
// FIXME: handle line breaks in enclosed fields
// FIXME: handle proper escape guessing
protected String lines[] = null;
public static final char WHITESPACE = 0;
public static final char GUESS = (char)-1;
protected char delim = ','; // field delimiter
protected char enclose = '"'; // (optional) field enclosing char
protected char escape = '\\'; // escape char for enclose char
protected int numCols = 0;
protected int numHeaderRows = 0;
protected int numFooterRows = 0;
protected String fields[][] = null;
public static final int UNKNOWN = 0;
public static final int STRING = 1;
public static final int FLOAT = 2;
public static final int INT = 3;
protected int typeMatrix = 0;
protected int typeColumn[] = null;
protected int typeRow[] = null;
public TabulatedData(String lines[]) { this(lines, GUESS); }
public TabulatedData(String lines[], char delim) { this(lines, delim, GUESS); }
public TabulatedData(String lines[], char delim, char enclose) { this(lines, delim, enclose, GUESS); }
public TabulatedData(String lines[], char delim, char enclose, char escape) {
this.lines = lines;
setParameters(delim, enclose, escape);
}
public void setDelim(char delim) { this.delim = delim; parse(); }
public char getDelim() { return delim; }
public void setEnclose(char enclose) { this.enclose = enclose; parse(); }
public char getEnclose() { return enclose; }
public void setEscape(char escape) { this.escape = escape; parse(); }
public char getEscape() { return escape; }
public void setParameters(char delim, char enclose, char escape) {
this.delim = delim; this.enclose = enclose; this.escape = escape; parse(); }
public String[][] getFields() { return fields; }
public int getNumCols() { return numCols; }
public int getNumRows() { return lines.length - numHeaderRows - numFooterRows; }
public boolean isSquareMatrix() { return getNumCols() == getNumRows(); }
public boolean isIntMatrix() { return checkMatrixType() == INT; }
public boolean isNumericMatrix() { return checkMatrixType() >= FLOAT; }
public String getMatrixType() { return typeString(checkMatrixType()); }
public boolean isIntColumn(int j) { return checkColumnType(j) == INT; }
public boolean isNumericColumn(int j) { return checkColumnType(j) >= FLOAT; }
public String getColumnType(int j) { return typeString(checkColumnType(j)); }
public boolean isIntRow(int j) { return checkRowType(j) == INT; }
public boolean isNumericRow(int j) { return checkRowType(j) >= FLOAT; }
public String getRowType(int j) { return typeString(checkRowType(j)); }
public float[][] getFloatMatrix() {
if (fields == null) parse();
float result[][] = new float[lines.length][numCols];
for (int i = 0; i < lines.length; i++)
for (int j = 0; j < numCols; j++)
result[i][j] = PApplet.parseFloat(fields[i][j]);
return result;
}
public int[][] getIndexMatrix() { return getIntMatrix(-1); }
public int[][] getIntMatrix() { return getIntMatrix(0); }
public int[][] getIntMatrix(int delta) {
if (fields == null) parse();
int result[][] = new int[lines.length][numCols];
for (int i = 0; i < lines.length; i++)
for (int j = 0; j < numCols; j++)
result[i][j] = PApplet.parseInt(fields[i][j]) + delta;
return result;
}
public String[] getColumn(int j) {
if (fields == null) parse();
String result[] = new String[lines.length];
for (int i = 0; i < lines.length; i++)
result[i] = fields[i][j];
return result;
}
public float[] getFloatColumn(int j) { return PApplet.parseFloat(getColumn(j)); }
public int[] getIndexColumn(int j) { return getIntColumn(j, -1); }
public int[] getIntColumn(int j) { return PApplet.parseInt(getColumn(j)); }
public int[] getIntColumn(int j, int delta) {
if (fields == null) parse();
int result[] = new int[lines.length];
for (int i = 0; i < lines.length; i++)
result[i] = PApplet.parseInt(fields[i][j]) + delta;
return result;
}
public String[] getRow(int i) { if (fields == null) parse(); return fields[i]; }
public float[] getFloatRow(int i) { return PApplet.parseFloat(getRow(i)); }
public int[] getIndexRow(int i) { return getIntRow(i, -1); }
public int[] getIntRow(int i) { return PApplet.parseInt(getRow(i)); }
public int[] getIntRow(int i, int delta) {
if (fields == null) parse();
int result[] = new int[numCols];
for (int j = 0; j < numCols; j++)
result[j] = PApplet.parseInt(fields[i][j]) + delta;
return result;
}
protected String[] parseRecord(String record, char delim, char enclose, char escape) {
Vector<String> result = new Vector<String>(); // return value
String currentField = ""; // current field
boolean enclosed = false;
boolean escaped = false;
for (int i = 0; i < record.length(); i++) {
char c = record.charAt(i);
if ((c == delim) && !enclosed) {
if ((delim != ' ') || (!currentField.equals(""))) { // treat multiple spaces as one separator
result.add(currentField);
currentField = "";
}
} else if ((c == escape) && (i < record.length() - 1) && (record.charAt(i+1) == enclose)) {
escaped = true;
} else if ((c == enclose) && !escaped) {
enclosed = !enclosed;
} else {
currentField += c;
escaped = false;
}
}
result.add(currentField);
return result.toArray(new String[numCols]);
}
protected String[][] parse(String lines[], char delim, char enclose, char escape, int minNumCols) {
// parse one record that most probably is neither header nor footer and check number of columns
int numCols = parseRecord(lines[lines.length/2], delim, enclose, escape).length;
if (numCols < minNumCols) return null;
// parse all records and make sure they all have the same number of columns
String fields[][] = new String[lines.length][];
for (int i = 0; i < lines.length; i++) {
fields[i] = parseRecord(lines[i], delim, enclose, escape);
if (fields[i].length != numCols) return null;
}
return fields;
}
protected void parse() {
// reset all processed data
this.fields = null;
typeMatrix = 0;
typeColumn = null;
typeRow = null;
numCols = 0;
// check if we actually have to do anything
if ((lines == null) || (lines.length == 0)) return;
// cycle through all parameters that are to be GUESSed, saving the "best" parsing result
char _delim = this.delim, _enclose = this.enclose, _escape = this.escape;
for (char enclose : (_enclose == GUESS) ? new char[] { '"', '\'' } : new char[] { _enclose }) {
for (char escape : (_escape == GUESS) ? new char[] { '\\', enclose } : new char[] { _escape }) {
for (char delim : (_delim == GUESS) ? new char[] { '\t', ' ', ',', ';', '$' } : new char[] { _delim }) {
// try to parse into more columns than we already found (at least 2)
String fields[][] = parse(lines, delim, enclose, escape, PApplet.max(2, numCols + 1));
if (fields != null) {
// these seem to be good parameters, save them into the class member variables
// (yes, using the same names for local and class variables is pretty awesome fun)
this.fields = fields;
this.delim = delim;
this.enclose = enclose;
this.escape = escape;
numCols = fields[0].length;
}
}
}
}
// if the guessing didn't yield anything appropriate, then treat this as 1-column data
if (this.fields == null) {
this.fields = new String[lines.length][1];
for (int i = 0; i < lines.length; i++)
this.fields[i][0] = lines[i];
numCols = 1;
}
// create proper type caches
typeColumn = new int[numCols];
typeRow = new int[lines.length];
}
protected int checkMatrixType() {
if (typeMatrix == 0) {
typeMatrix = INT;
for (int j = 0; j < numCols; j++)
typeMatrix = PApplet.min(typeMatrix, checkColumnType(j));
}
return typeMatrix;
}
protected int checkColumnType(int j) {
if (typeColumn[j] == 0) {
typeColumn[j] = INT;
for (int i = 0; i < lines.length; i++) {
if (typeColumn[j] == INT) try { Integer.valueOf(fields[i][j]); } catch (NumberFormatException e) { typeColumn[j] = FLOAT; }
if (typeColumn[j] == FLOAT) try { Float.valueOf(fields[i][j]); } catch (NumberFormatException e) { typeColumn[j] = STRING; break; }
}
}
return typeColumn[j];
}
protected int checkRowType(int i) {
if (typeRow[i] == 0) {
typeRow[i] = INT;
for (int j = 0; j < numCols; j++) {
if (typeRow[i] == INT) try { Integer.valueOf(fields[i][j]); } catch (NumberFormatException e) { typeRow[i] = FLOAT; }
if (typeRow[i] == FLOAT) try { Float.valueOf(fields[i][j]); } catch (NumberFormatException e) { typeRow[i] = STRING; break; }
}
}
return typeRow[i];
}
protected String typeString(int type) {
switch (type) {
case STRING: return "string";
case FLOAT: return "numeric";
case INT: return "integer";
default: return "unknown";
}
}
}