package edu.harvard.i2b2.crc.loader.util.csv;
import java.util.*;
import java.io.*;
/**
* CSVFileReader is a class derived from CSVFile used to parse an existing CSV file.
* <p>
* Adapted from a C++ original that is Copyright (C) 1999 Lucent Technologies.<br>
* Excerpted from 'The Practice of Programming' by Brian Kernighan and Rob Pike.
* <p>
* Included by permission of the <a href="http://tpop.awl.com/">Addison-Wesley</a> web site, which says:
* <cite>"You may use this code for any purpose, as long as you leave the copyright notice and book citation attached"</cite>.
*
* @author Brian Kernighan and Rob Pike (C++ original)
* @author Ian F. Darwin (translation into Java and removal of I/O)
* @author Ben Ballard (rewrote handleQuotedField to handle double quotes and for readability)
* @author Fabrizio Fazzino (added integration with CSVFile, handling of variable textQualifier and Vector with explicit String type)
* @version %I%, %G%
*/
public class CSVFileReader extends CSVFile {
/**
* The buffered reader linked to the CSV file to be read.
*/
protected BufferedReader in;
/**
* CSVFileReader constructor just need the name of the existing CSV file that will be read.
*
* @param inputFileName The name of the CSV file to be opened for reading
* @throws FileNotFoundException If the file to be read does not exist
*/
public CSVFileReader(String inputFileName) throws FileNotFoundException {
super();
in = new BufferedReader(new FileReader(inputFileName));
}
/**
* CSVFileReader constructor with a given field separator.
*
* @param inputFileName The name of the CSV file to be opened for reading
* @param sep The field separator to be used; overwrites the default one
* @throws FileNotFoundException If the file to be read does not exist
*/
public CSVFileReader(String inputFileName, char sep) throws FileNotFoundException {
super(sep);
in = new BufferedReader(new FileReader(inputFileName));
}
/**
* CSVFileReader constructor with given field separator and text qualifier.
*
* @param inputFileName The name of the CSV file to be opened for reading
* @param sep The field separator to be used; overwrites the default one
* @param qual The text qualifier to be used; overwrites the default one
* @throws FileNotFoundException If the file to be read does not exist
*/
public CSVFileReader(String inputFileName, char sep, char qual) throws FileNotFoundException {
super(sep, qual);
in = new BufferedReader(new FileReader(inputFileName));
}
/**
* Split the next line of the input CSV file into fields.
* <p>
* This is currently the most important function of the package.
*
* @return Vector of strings containing each field from the next line of the file
* @throws IOException If an error occurs while reading the new line from the file
*/
public Vector<String> readFields() throws IOException {
Vector<String> fields = new Vector<String>();
StringBuffer sb = new StringBuffer();
String line = in.readLine();
if(line==null) return null;
if(line.length()==0) {
fields.add(line);
return fields;
}
int i = 0;
do {
sb.setLength(0);
if(i<line.length() && line.charAt(i)==textQualifier) {
i = handleQuotedField(line, sb, ++i); // skip quote
} else {
i = handlePlainField(line, sb, i);
}
fields.add(sb.toString());
i++;
} while(i<line.length());
return fields;
}
/**
* Close the input CSV file.
*
* @throws IOException If an error occurs while closing the file
*/
public void close() throws IOException {
in.close();
}
/**
* Handles a quoted field.
*
* @return index of next separator
*/
protected int handleQuotedField(String s, StringBuffer sb, int i) {
int j;
int len = s.length();
for(j=i; j<len; j++) {
if((s.charAt(j)==textQualifier) && (j+1<len)) {
if(s.charAt(j+1) == textQualifier) {
j++; // skip escape char
} else if(s.charAt(j+1)==fieldSeparator) { // next delimiter
j++; // skip end quotes
break;
}
} else if((s.charAt(j)==textQualifier) && (j+1==len)) { // end quotes at end of line
break; // done
}
sb.append(s.charAt(j)); // regular character
}
return j;
}
/**
* Handles an unquoted field.
*
* @return index of next separator
*/
protected int handlePlainField(String s, StringBuffer sb, int i) {
int j = s.indexOf(fieldSeparator, i); // look for separator
if(j == -1) { // none found
sb.append(s.substring(i));
return s.length();
} else {
sb.append(s.substring(i, j));
return j;
}
}
}