/*
* Copyright(c) 2005 Center for E-Commerce Infrastructure Development, The
* University of Hong Kong (HKU). All Rights Reserved.
*
* This software is licensed under the GNU GENERAL PUBLIC LICENSE Version 2.0 [1]
*
* [1] http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
*/
package hk.hku.cecid.piazza.commons.util;
import hk.hku.cecid.piazza.commons.io.IOHandler;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* CSVTokenizer is a tokenizer which can parses a given CSV and represent its
* content as multiple rows and columns.
*
* @author Hugo Y. K. Lam
*
*/
public class CSVTokenizer {
private StringTokenizer rows;
private String[] columns;
private Pattern delimiterPattern = Pattern.compile(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");
private String lineFeed = System.getProperty("line.separator");
/**
* Creates a new instance of CSVTokenizer.
*
* @param csv the CSV content.
* @throws IOException if unable to read the content from the given reader.
*/
public CSVTokenizer(Reader csv) throws IOException {
this(IOHandler.readString(csv));
}
/**
* Creates a new instance of CSVTokenizer.
*
* @param csv the CSV content.
*/
public CSVTokenizer(String csv) {
rows = new StringTokenizer(csv == null ? "" : csv, "\r\n");
}
/**
* Checks if there are any more rows in this tokenizer.
*
* @return true if there are more rows in this tokenizer.
*/
public boolean hasMoreRows() {
return rows.hasMoreTokens();
}
/**
* Proceeds to tokenize the next row.
*/
public void nextRow() {
String row = rows.nextToken();
while (isOpenRow(row)) {
row += lineFeed + rows.nextToken();
}
tokenizeRow(row);
}
/**
* Gets the number of columns in the current row.
*
* @return the number of columns.
*/
public int getColumnCount() {
return columns == null ? 0 : columns.length;
}
/**
* Gets a column's value.
*
* @param pos the position of the column.
* @return the specified column's value.
*/
public String getColumn(int pos) {
return columns == null || pos >= columns.length ? null : columns[pos];
}
/**
* Checks if the given row is an open row.
*
* @param r the row for checking.
* @return true if the given row is an open row.s
*/
private boolean isOpenRow(String r) {
int c = 0;
for (int i = 0; r != null && i < r.length(); i++) {
if ('\"' == r.charAt(i)) {
c++;
}
}
return c % 2 > 0;
}
/**
* Tokenizes the given rows into columns.
*
* @param row the row to be tokenized.
*/
private void tokenizeRow(String row) {
ArrayList tokens = new ArrayList();
Matcher m = delimiterPattern.matcher(row);
int index = 0;
while(m.find()) {
String token = row.subSequence(index, m.start()).toString();
tokens.add(normalize(token));
index = m.end();
}
tokens.add(normalize(row.substring(index, row.length())));
columns = (String[])tokens.toArray(new String[tokens.size()]);
}
/**
* Normalizes the given string by converting back the double quotes.
*
* @param s the string to be normalized.
* @return the normalized string.
*/
private String normalize(String s) {
return s.replaceAll("^(?s)[\\s]*\"(.*)\"[\\s]*$", "$1").replaceAll("\"\"", "\"");
}
}