/*==========================================================================*\
| $Id: CSVDataTable.java,v 1.4 2012/02/29 03:58:16 stedwar2 Exp $
|*-------------------------------------------------------------------------*|
| Copyright (C) 2007-2010 Virginia Tech
|
| This file is part of the Student-Library.
|
| The Student-Library is free software; you can redistribute it and/or
| modify it under the terms of the GNU Lesser General Public License as
| published by the Free Software Foundation; either version 3 of the
| License, or (at your option) any later version.
|
| The Student-Library is distributed in the hope that it will be useful,
| but WITHOUT ANY WARRANTY; without even the implied warranty of
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
| GNU Lesser General Public License for more details.
|
| You should have received a copy of the GNU Lesser General Public License
| along with the Student-Library; if not, see <http://www.gnu.org/licenses/>.
\*==========================================================================*/
package student;
import com.Ostermiller.util.ExcelCSVParser;
import com.Ostermiller.util.ExcelCSVPrinter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
//-------------------------------------------------------------------------
/**
* CSVDataTable is an implementation of DataTable
* that allows the user to interact with CSV Files.
* The CSVDataTable class loads a CSV file into
* a DataTable format that can be maniupulated using
* the methods of the DataTable class.
*
*
* @author Matthew Thornton
* @author Last changed by $Author: stedwar2 $
* @version $Revision: 1.4 $, $Date: 2012/02/29 03:58:16 $
*/
public class CSVDataTable
implements DataTable
{
//~ Instance/static variables .............................................
private List<Row> tableRows;
private Map<String, Row> keyMappings;
private List<String> columnNames;
private String primaryKey;
private char delimiter;
private String url;
private static final String ROW_INDEX_MARKER = "";
private static final String COL_INDEX_PREFIX = "_C";
//~ Constructors ..........................................................
// ----------------------------------------------------------
/**
* This constructor creates an empty DataTable with no columns.
* The key is set to the empty string.
*/
public CSVDataTable()
{
tableRows = new ArrayList<Row>();
columnNames = new ArrayList<String>();
keyMappings = new HashMap<String, Row>();
delimiter = ',';
}
// ----------------------------------------------------------
/**
* Create a new data table using CSV data on a website. The URL
* parameter is a link to a data file stored on the web. All data rows
* will be loaded (i.e., the first row will not be treated as column
* names), and lines will be parsed assuming the delimiter is a
* comma.
*
* @param url the URL path to the CSV file.
*/
public CSVDataTable(String url)
{
this(url, false, ',');
}
// ----------------------------------------------------------
/**
* Create a new data table using CSV data on a website. The URL
* parameter is a link to a data file stored on the web. All data rows
* will be loaded, and lines will be parsed assuming the delimiter is a
* comma.
*
* @param url the URL path to the CSV file.
* @param firstRowNames If true, then the first row in the data source
* should be treated as containing the names for the columns
* in the data set.
*/
public CSVDataTable(String url, boolean firstRowNames)
{
this(url, firstRowNames, ',');
}
// ----------------------------------------------------------
/**
* Create a new data table using CSV data on a website. The URL
* parameter is a link to a data file stored on the web.
*
* @param url the URL path to the CSV file.
* @param firstRowNames If true, then the first row in the data source
* should be treated as containing the names for the columns
* in the data set.
* @param delimiter The separator between fields on the same line
* (e.g., a comma, tab, colon, etc.).
*/
public CSVDataTable(String url, boolean firstRowNames, char delimiter)
{
this.url = url;
try
{
InputStream in = (new URL(url)).openStream();
loadFrom(in, firstRowNames, delimiter);
in.close();
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
// ----------------------------------------------------------
/**
* Create a new data table using CSV data from a file. All data rows
* will be loaded (i.e., the first row will not be treated as column
* names), and lines will be parsed assuming the delimiter is a
* comma.
*
* @param file the CSV file to read.
*/
public CSVDataTable(File file)
{
this(file, false, ',');
}
// ----------------------------------------------------------
/**
* Create a new data table using CSV data from a file. All data rows
* will be loaded, and lines will be parsed assuming the delimiter is a
* comma.
*
* @param file the CSV file to read.
* @param firstRowNames If true, then the first row in the data source
* should be treated as containing the names for the columns
* in the data set.
*/
public CSVDataTable(File file, boolean firstRowNames)
{
this(file, firstRowNames, ',');
}
// ----------------------------------------------------------
/**
* Create a new data table using CSV data on a website. The URL
* parameter is a link to a data file stored on the web.
*
* @param file the CSV file to read.
* @param firstRowNames If true, then the first row in the data source
* should be treated as containing the names for the columns
* in the data set.
* @param delimiter The separator between fields on the same line
* (e.g., a comma, tab, colon, etc.).
*/
public CSVDataTable(File file, boolean firstRowNames, char delimiter)
{
try
{
InputStream in = new FileInputStream(file);
loadFrom(in, firstRowNames, delimiter);
in.close();
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
// ----------------------------------------------------------
/**
* Create a new data table using CSV data from an input stream. The URL
* parameter is a link to a data file stored on the web.
*
* @param in the input stream that contains the CSV data.
* @param firstRowNames If true, then the first row in the data source
* should be treated as containing the names for the columns
* in the data set.
* @param delimiter The separator between fields on the same line
* (e.g., a comma, tab, colon, etc.).
*/
public CSVDataTable(InputStream in, boolean firstRowNames, char delimiter)
{
url = null;
loadFrom(in, firstRowNames, delimiter);
}
//~ Public Methods ........................................................
// ----------------------------------------------------------
public int rowCount()
{
return tableRows.size();
}
// ----------------------------------------------------------
public int colCount()
{
if (tableRows.size() == 0)
{
return 0;
}
else
{
return tableRows.get(0).size();
}
}
// ----------------------------------------------------------
public String getCell(int row, String column)
{
return tableRows.get(row).get(column);
}
// ----------------------------------------------------------
public String getCell(int row, int column)
{
return tableRows.get(row).get(COL_INDEX_PREFIX + column);
}
// ----------------------------------------------------------
public String getCell(String row, String column)
{
return keyMappings.get(row).get(column);
}
// ----------------------------------------------------------
public String getCell(String row, int column)
{
return keyMappings.get(row).get(COL_INDEX_PREFIX + column);
}
// ----------------------------------------------------------
public int getIntCell(int row, String column)
throws NumberFormatException
{
return Integer.parseInt(getCell(row, column));
}
// ----------------------------------------------------------
public int getIntCell(int row, int column)
throws NumberFormatException
{
return Integer.parseInt(getCell(row, column));
}
// ----------------------------------------------------------
public int getIntCell(String row, String column)
throws NumberFormatException
{
return Integer.parseInt(getCell(row, column));
}
// ----------------------------------------------------------
public int getIntCell(String row, int column)
throws NumberFormatException
{
return Integer.parseInt(getCell(row, column));
}
// ----------------------------------------------------------
public double getDoubleCell(int row, String column)
throws NumberFormatException
{
return Double.parseDouble(getCell(row, column));
}
// ----------------------------------------------------------
public double getDoubleCell(int row, int column)
throws NumberFormatException
{
return Double.parseDouble(getCell(row, column));
}
// ----------------------------------------------------------
public double getDoubleCell(String row, String column)
throws NumberFormatException
{
return Double.parseDouble(getCell(row, column));
}
// ----------------------------------------------------------
public double getDoubleCell(String row, int column)
throws NumberFormatException
{
return Double.parseDouble(getCell(row, column));
}
// ----------------------------------------------------------
public Row getRow(int row)
{
return tableRows.get(row);
}
// ----------------------------------------------------------
public Row getRow(String row)
{
return keyMappings.get(row);
}
// ----------------------------------------------------------
public void setCell(int row, String column, String value)
{
setCell(tableRows.get(row), column, value);
}
// ----------------------------------------------------------
public void setCell(int row, String column, int value)
{
setCell(tableRows.get(row), column, Integer.toString(value));
}
// ----------------------------------------------------------
public void setCell(int row, String column, double value)
{
setCell(tableRows.get(row), column, Double.toString(value));
}
// ----------------------------------------------------------
public void setCell(int row, int column, String value)
{
setCell(tableRows.get(row), column, value);
}
// ----------------------------------------------------------
public void setCell(int row, int column, int value)
{
setCell(tableRows.get(row), column, Integer.toString(value));
}
// ----------------------------------------------------------
public void setCell(int row, int column, double value)
{
setCell(tableRows.get(row), column, Double.toString(value));
}
// ----------------------------------------------------------
public void setCell(String row, String column, String value)
{
setCell(keyMappings.get(row), column, value);
}
// ----------------------------------------------------------
public void setCell(String row, String column, int value)
{
setCell(keyMappings.get(row), column, Integer.toString(value));
}
// ----------------------------------------------------------
public void setCell(String row, String column, double value)
{
setCell(keyMappings.get(row), column, Double.toString(value));
}
// ----------------------------------------------------------
public void setCell(String row, int column, String value)
{
setCell(keyMappings.get(row), column, value);
}
// ----------------------------------------------------------
public void setCell(String row, int column, int value)
{
setCell(keyMappings.get(row), column, Integer.toString(value));
}
// ----------------------------------------------------------
public void setCell(String row, int column, double value)
{
setCell(keyMappings.get(row), column, Double.toString(value));
}
// ----------------------------------------------------------
public Row removeRow(int row)
{
Row removedRow = tableRows.remove(row);
if (primaryKey != null)
{
keyMappings.remove(removedRow.get(primaryKey));
}
remapTableRows(row);
return removedRow;
}
// ----------------------------------------------------------
public Row removeRow(String row)
{
Row removedRow = keyMappings.remove(row);
int rowNumber = tableRows.indexOf(removedRow);
tableRows.remove(rowNumber);
remapTableRows(rowNumber);
return removedRow;
}
// ----------------------------------------------------------
public void addColumn(String columnName)
{
addColumn(columnName, null);
}
// ----------------------------------------------------------
public void addColumn(String columnName, String defaultValue)
{
int colNumber = columnNames.size();
columnNames.add(columnName);
for (int i = 0; i < tableRows.size(); i++)
{
Row row = tableRows.get(i);
if (defaultValue != null)
{
row.put(columnName, defaultValue);
row.put(COL_INDEX_PREFIX + colNumber, defaultValue);
}
}
}
// ----------------------------------------------------------
public void addRow(Row row)
{
row.put(ROW_INDEX_MARKER, Integer.toString(tableRows.size()));
tableRows.add(row);
for (int i = 0; i < columnNames.size(); i++)
{
String data = row.get(columnNames.get(i));
row.put(COL_INDEX_PREFIX + i, data);
}
keyMappings.put(row.get(primaryKey), row);
}
// ----------------------------------------------------------
public void setPrimaryKey(String key)
{
assert columnNames.contains(key)
: "The primary key must be one of the column names.";
primaryKey = key;
constructBindings();
}
// ----------------------------------------------------------
public String getPrimaryKey()
{
return primaryKey;
}
// ----------------------------------------------------------
public List<String> getColumnNames()
{
return columnNames;
}
// ----------------------------------------------------------
public void setColumnNames(List<String> columns)
{
// assert columns.size() == columnNames.size()
// : "Array passed should map to all of the existing columns.";
if (primaryKey != null)
{
primaryKey = columns.get(columnNames.indexOf(primaryKey));
}
for (int i = 0; i < tableRows.size(); i++)
{
Row row = tableRows.get(i);
// Clear old col names
for (String col : columnNames)
{
row.remove(col);
}
// Add new col names
for (int j = 0; j < columns.size(); j++)
{
row.put(columns.get(j), row.get(COL_INDEX_PREFIX + j));
}
}
constructBindings();
columnNames = new ArrayList<String>(columns);
}
// ----------------------------------------------------------
public void setColumnNames(String... columns)
{
setColumnNames(Arrays.asList(columns));
}
// ----------------------------------------------------------
public void remapColumnNames(Map<String, String> columnMap)
{
ArrayList<String> newColumnNames = new ArrayList<String>();
assert columnMap.size() == columnNames.size()
: "The columnMap parameter must map each of the column "
+ "names to a new value.";
if (primaryKey != null)
{
primaryKey = columnMap.get(primaryKey);
}
for (int i = 0; i < columnNames.size(); i++)
{
newColumnNames.add(columnMap.get(columnNames.get(i)));
}
for (int i = 0; i < tableRows.size(); i++)
{
Row row = tableRows.get(i);
for (String col : columnNames)
{
String val = row.remove(col);
row.put(columnMap.get(col), val);
}
}
constructBindings();
columnNames = newColumnNames;
}
// ----------------------------------------------------------
public List<Row> getAllRows()
{
return tableRows;
}
// ----------------------------------------------------------
public Iterator<Row> iterator()
{
return getAllRows().iterator();
}
// ----------------------------------------------------------
/**
* Get the URL used to load this data table.
* @return The URL used to load this data table, or null if it was
* not loaded from a URL.
*/
public String getUrl()
{
return url;
}
// ----------------------------------------------------------
@SuppressWarnings("unchecked")
public void copyRowsFrom(DataTable dataSet)
{
tableRows = (List<Row>)((ArrayList<Row>)dataSet.getAllRows()).clone();
columnNames = (List<String>)((ArrayList<String>)dataSet
.getColumnNames()).clone();
constructBindings();
}
// ----------------------------------------------------------
public int count(String column, String value)
{
int count = 0;
for (Row row : tableRows)
{
if ( (value == null && row.get(column) == null)
|| (value != null && value.equals(row.get(column))))
{
count++;
}
}
return count;
}
// ----------------------------------------------------------
public double avg(String column)
throws NumberFormatException
{
double sum = 0.0;
int count = 0;
for (Row row : tableRows)
{
String val = row.get(column);
if (val != null)
{
sum += Double.parseDouble(val);
count++;
}
}
return count == 0
? 0.0
: sum / count;
}
// ----------------------------------------------------------
public double max(String column)
throws NumberFormatException
{
assert tableRows.size() > 0
: "There are no values in the table";
boolean found = false;
double max = Double.NEGATIVE_INFINITY;
for (Row row : tableRows)
{
String val = row.get(column);
if (val != null)
{
double d = Double.parseDouble(val);
if (d > max)
{
found = true;
max = d;
}
}
}
assert found : "There are no values in this column of the table";
return max;
}
// ----------------------------------------------------------
public double min(String column)
throws NumberFormatException
{
assert tableRows.size() > 0
: "There are no values in the table";
boolean found = false;
double min = Double.POSITIVE_INFINITY;
for (Row row : tableRows)
{
String val = row.get(column);
if (val != null)
{
double d = Double.parseDouble(val);
if (d < min)
{
found = true;
min = d;
}
}
}
assert found : "There are no values in this column of the table";
return min;
}
// ----------------------------------------------------------
/**
* This method saves the CSVDataTable in a CSV file format
* with the first row the column names and the remaining rows
* the data in the table. The delimiter for the CSV is determined
* either the value that was used in creating the data table or the
* value that is set with the setDelimiter() method.
*
* @param filename the path to the file to save the CSVDataTable to.
*/
public void save(String filename)
{
try
{
FileWriter out = new FileWriter(IOHelper.getFile(filename));
ExcelCSVPrinter printer = new ExcelCSVPrinter(out);
printer.changeDelimiter(delimiter);
for (Row row : tableRows)
{
for (String col : columnNames)
{
printer.print(row.get(col));
}
int col = columnNames.size();
String coln = COL_INDEX_PREFIX + col;
String val = row.get(coln);
while (val != null)
{
printer.print(val);
col++;
coln = COL_INDEX_PREFIX + col;
val = row.get(coln);
}
printer.println();
}
printer.close();
out.close();
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
// ----------------------------------------------------------
/**
* Set the delimiter used by this data set, which will affect how
* this object will be written to a file in the future.
* @param delimiter The separator between fields on the same line
* (e.g., a comma, tab, colon, etc.).
*/
public void setDelimiter(char delimiter)
{
this.delimiter = delimiter;
}
// ----------------------------------------------------------
/**
* Get the delimiter used by this data set.
* @return the current delimiter
*/
public char getDelimiter()
{
return delimiter;
}
//~ Private Methods .......................................................
// ----------------------------------------------------------
private void loadFrom(
InputStream in, boolean firstRowNames, char newDelimiter)
{
tableRows = new ArrayList<Row>();
columnNames = new ArrayList<String>();
keyMappings = new HashMap<String, Row>();
this.delimiter = newDelimiter;
try
{
String[][] table =
ExcelCSVParser.parse(new InputStreamReader(in), newDelimiter);
constructColumnNames(table, firstRowNames);
constructTable(table, firstRowNames);
}
catch(IOException e)
{
throw new RuntimeException(e);
}
}
// ----------------------------------------------------------
private void constructTable(String[][] parsedTable, boolean firstRowNames)
{
tableRows.clear();
int rowCount = 0;
for (int i = firstRowNames ? 1 : 0; i < parsedTable.length; i++)
{
HashedRow newRow = new HashedRow();
newRow.put(ROW_INDEX_MARKER, Integer.toString(rowCount));
tableRows.add(newRow);
for (int j = 0; j < parsedTable[i].length; j++)
{
newRow.put(COL_INDEX_PREFIX + j, parsedTable[i][j]);
if (columnNames.size() > j)
{
newRow.put(columnNames.get(j), parsedTable[i][j]);
}
}
if (primaryKey != null)
{
keyMappings.put(newRow.get(primaryKey), newRow);
}
rowCount++;
}
}
// ----------------------------------------------------------
private void constructColumnNames(String[][] table, boolean firstRowNames)
{
columnNames.clear();
if (firstRowNames && table.length > 0)
{
for (String col : table[0])
{
columnNames.add(col);
}
}
}
// ----------------------------------------------------------
private void constructBindings()
{
keyMappings.clear();
if (primaryKey == null)
{
return;
}
for (int i = 0; i < tableRows.size(); i++)
{
// assert !keyMappings.containsKey(
// tableRows.get(i).get(primaryKey))
// : "Primary Key must be unique.";
Row row = tableRows.get(i);
keyMappings.put(row.get(primaryKey), row);
}
}
// ----------------------------------------------------------
private void remapTableRows(int row)
{
for (int i = row;i < tableRows.size(); i++)
{
tableRows.get(i).put(ROW_INDEX_MARKER, Integer.toString(i));
}
}
// ----------------------------------------------------------
private void setCell(Row row, String col, String value)
{
row.put(col, value);
int num = columnNames.indexOf(col);
row.put(COL_INDEX_PREFIX + num, value);
}
// ----------------------------------------------------------
private void setCell(Row row, int col, String value)
{
row.put(COL_INDEX_PREFIX + col, value);
if (columnNames.size() > col)
{
row.put(columnNames.get(col), value);
}
}
}