/*
* SimpleCSVImporter.java
*
* Created on September 22, 2005, 12:27 PM
*
* To change this template, choose Tools | Options and locate the template under
* the Source Creation and Management node. Right-click the template and choose
* Open. You can then make changes to the template in the Source Editor.
*/
package org.tgdb.io;
import org.tgdb.util.TabularData;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
/**
* Class for reading a CSV file into a string matrix
* @author lami
*/
public class SimpleCSVImporter {
private String[][] csvData;
private boolean checkedTextDelimiter;
private boolean useTextDelimiter;
/** Creates a new instance of SimpleCSVImporter */
public SimpleCSVImporter() {
}
/**
* Returns the data matrix
* @return The data from the CSV file
*/
public String[][] getData() { return csvData; }
/**
* Returns a TabularData object for csv data that has a header row.
*/
public TabularData getTabularData() {
return new TabularData(csvData);
}
/**
* Returns the number of columns in the CSV file
* @return The number of columns in the CSV file
*/
public int getNumCols() { return csvData[0].length; }
/**
* Returns the number of rows in the CSV file, including the header.
* @return The number of rows in the CSV file
*/
public int getNumRows() { return csvData.length; }
public void useTextDelimiter(boolean val) {
useTextDelimiter = val;
}
// private void getCSVSize(String fileName, String separator, boolean equalColumnSizeOnly) throws IOException {
// try {
// BufferedReader in = new BufferedReader(new FileReader(fileName));
//
//
// //String[] headers = in.readLine().trim().split(separator);
// String[] headers = splitRow(in.readLine().trim(), separator);
// int rows = 1;
// int cols = 0;
// if(equalColumnSizeOnly)
// cols = headers.length;
//
// String str_row = "";
// String[] row = null;
// int max = 0;
//
// while(in.ready()) {
// if(equalColumnSizeOnly)
// in.readLine();
// else {
// //row = in.readLine().trim().split(separator);
// str_row = getLine(in);
// row = splitRow(str_row, separator);
// }
//
// if(row != null) {
// if(row.length > max)
// max = row.length;
// }
//
// rows++;
// }
//
// in.close();
//
// if(equalColumnSizeOnly)
// csvData = new String[rows][cols];
// else
// csvData = new String[rows][max];
// } catch (IOException ioe) {
// //System.out.println("DOH! An IO error appear to have occured when fetching CSV size!\n"+ioe.getMessage());
// throw new IOException("An IO error appear to have occured when fetching CSV size!\n"+ioe.getMessage());
// }
// }
/**
* Print a String array to std out. This is for debugging.
*/
public void printCSV(String[] str, String delim) {
String out = str.length+"#";
for (int i=0;i<str.length;i++) {
if (i!=0)
out += delim;
out += str[i];
}
System.out.println(out);
}
/**
* For CSV data in a String array...
*/
private void getCSVSize(String[] csvText, String separator, boolean equalColumnSizeOnly) throws IOException {
try {
//String[] headers = csvText[0].trim().split(separator);
String[] headers = splitRow(csvText[0].trim(), separator);
//printCSV(headers,";");
int rows = 1;
int cols = 0;
if(equalColumnSizeOnly)
cols = headers.length;
String[] row = null;
int max = 0;
for (int i=0;i<csvText.length;i++) {
//row = csvText[i].trim().split(separator);
row = splitRow(csvText[i].trim(), separator);
if(row != null) {
if(row.length > max)
max = row.length;
}
rows++;
}
if(equalColumnSizeOnly)
csvData = new String[rows-1][cols];
else
csvData = new String[rows-1][max];
} catch (Exception e) {
//System.out.println("DOH! An IO error appear to have occured when fetching CSV size!\n"+e.getMessage());
e.printStackTrace();
throw new IOException("An IO error appear to have occured when fetching CSV size!\n"+e.getMessage());
}
}
/**
* Get a "line" of csv data. This rutine ignores \n inside text delimitors
*/
private String getLine(BufferedReader in) throws IOException {
String row = "";
int i=0;
boolean done = false;
while (!done) {
row += in.readLine().trim()+"\n";
// System.out.println("checkUnfinishedRow="+checkUnfinishedRow(row));
if (checkUnfinishedRow(row)==false)
done = true;
if (i>100)
throw new IOException("Unterminated text string found. Aborting.");
i++;
}
// System.out.println("getLine, row="+row);
return row;
}
/**
* Reads the CSV file
* @param fileName The file to read
* @param separator The separator, i.e. comma, semicolon etc.
* @param equalColumnSizeOnly Set to true if columns should be of equals size.
* @throws java.io.IOException If something went wrong during the process.
*/
public void importCSV(String fileName, String separator, boolean equalColumnSizeOnly) throws IOException {
//getCSVSize(fileName, separator, equalColumnSizeOnly);
BufferedReader in = new BufferedReader(new FileReader(fileName));
ArrayList temp = new ArrayList();
String row = "";
while (in.ready())
{
row = in.readLine();
temp.add(row);
}
in.close();
String[] csvText = ArrayList2StringArray(temp);
importCSV(csvText, separator, equalColumnSizeOnly);
}
private String[] ArrayList2StringArray(ArrayList arr)
{
String[] out = new String[arr.size()];
for (int i=0;i<arr.size();i++)
{
out[i] = (String)arr.get(i);
}
return out;
}
/**
* Check if a row starts with a text delimiter but never closes it again.
* This means that a cvs cell contains a \n and it needs to be handled.
*
*/
public boolean checkUnfinishedRow(String row) {
boolean insideTextDelim = false;
char c;
for (int i=0;i<row.length();i++) {
c = row.charAt(i);
if (c=='\"' && insideTextDelim==false) {
insideTextDelim=true;
} else if (c=='\"' && insideTextDelim==true) {
insideTextDelim=false;
}
}
return insideTextDelim;
}
/**
* Fix an arraylist of csv text with line brakes within a textdelimiter.
* Concatenate the two lines and separate them with a \n.
*
*/
private String[] fixMultiLine(String[] csvText)
{
ArrayList arr = new ArrayList();
for (int i=0;i<csvText.length;i++)
{
String row = csvText[i];
while (checkUnfinishedRow(row)==true)
{
row += "\n"+csvText[++i];
}
arr.add(row);
}
// Compile a new array.
String[] out = new String[arr.size()];
for (int i=0;i<arr.size();i++)
{
out[i] = (String)arr.get(i);
}
return out;
}
/**
* Reads CSV Data from a String array
* @param csvText A String array with unparsed CSV data
* @param separator The separator, i.e. comma, semicolon etc.
* @param equalColumnSizeOnly Set to true if columns should be of equals size.
* @throws java.io.IOException If something went wrong during the process.
*/
public void importCSV(String[] csvText, String separator, boolean equalColumnSizeOnly) throws IOException {
csvText = fixMultiLine(csvText);
getCSVSize(csvText, separator, equalColumnSizeOnly);
//BufferedReader in = new BufferedReader(new FileReader(fileName));
String[] splitted = null;
int i = 0;
int cols = csvData[0].length;
String row = "";
for (int j=0;j<csvText.length;j++) {
row = csvText[j].trim();
if(row.length() > 0) {
//splitted = row.split(separator);
splitted = splitRow(row, separator);
if(equalColumnSizeOnly) {
if(cols == splitted.length) {
cols = splitted.length;
} else {
throw new IOException("Columns are not equal on row "+i+"! Current col:"+splitted.length+" Previous col:"+cols);
}
}
csvData[i] = splitted;
i++;
}
}
}
/**
* Split a row into parts. This rutine can have text separators in the
* raw text, for example \"hej hå\",\"kkj\"
*
* Possible input:
* "hej jag kan använda komma, i detta test","blaha","iru,."
*
* Output:
* [hej jag kan använda komma, i detta test],[blaha],[iru,.]
*/
public String[] splitRow(String str, String delimiter) {
// System.out.println("SplitRow; str="+str);
// if (!checkedTextDelimiter)
// checkTextDelimiter(str, delimiter);
if (useTextDelimiter)
return splitWithTextDelimiter(str, delimiter);
else
return split(str, delimiter);
}
/**
* Split an ordinary csv text row
*/
private String[] split(String str, String delimiter) {
String[] temp = str.split(delimiter);
for (int i=0;i<temp.length;i++) {
temp[i] = temp[i].trim();
}
return temp;
}
private String[] splitWithTextDelimiter(String str, String delimiter) {
String[] out = null;
ArrayList list = new ArrayList();
char delim = delimiter.charAt(0);
boolean textDelim = false;
String tmp = "";
char c;
char last = '\0';
for (int i=0;i<str.length();i++) {
c = str.charAt(i);
if (c=='\"' && textDelim==false) {
// Start new text delim section
// System.out.println("Start text delim i="+i);
textDelim=true;
} else if (c=='\"' && textDelim==true) {
// End of text delim
// System.out.println("Stop text delim i="+i);
textDelim=false;
// list.add(tmp);
// System.out.println("Add to list, c="+c);
// tmp = "";
}
else if (c==delim && textDelim==false) {
// this is a delimiter
// if (last!='\"')
{
list.add(tmp);
// System.out.println("Add to list, c="+c);
}
tmp = "";
} else {
// Add character
tmp += c;
}
last = c;
}
// Add last element
list.add(tmp);
// System.out.println("Add to list last element");
int cols = list.size();
out = new String[cols];
for (int i=0;i<cols;i++) {
out[i] = (String)list.get(i);
}
return out;
}
}