package edu.northwestern.at.utils.csv;
/* Please see the license information in the header below. */
import java.io.*;
import java.util.*;
import edu.northwestern.at.utils.*;
/** CSVFileReader reads and parses a delimiter separated text file.
*
* <p>
* Adapted from a C++ original that is
* Copyright (C) 1999 Lucent Technologies.<br>
* Excerpted from 'The Practice of Programming' by Brian Kernighan and
* Rob Pike.
* </p>
*
* <p>
* Included by permission of the
* <a href="http://tpop.awl.com/">Addison-Wesley</a> web site, which says:
* <cite>"You may use this code for any purpose, as long as you leave the
* copyright notice and book citation attached"</cite>.
* </p>
*
* @author Brian Kernighan and Rob Pike (C++ original)
* @author Ian F. Darwin (translation into Java and removal of I/O)
* @author Ben Ballard (rewrote handleQuotedField to handle double
* qualifiers and for readability)
* @author Fabrizio Fazzino (added integration with CSVFile, handling of
* variable qualifier and ArrayList with explicit String type)
* @author Philip R. Burns. Allow character set encoding for files and
* List instead of Vector.
*/
public class CSVFileReader extends CSVFile
{
/** Buffered reader for the CSV file to be read.
*/
protected BufferedReader in;
/** Create a CSV file reader given a file name and encoding.
*
* @param inputFileName Name of the CSV formatted input file.
* @param encoding The character encoding for the file.
*
* @throws FileNotFoundException If input file does not exist.
* @throws IOException If input file cannot be opened.
*/
public CSVFileReader( String inputFileName , String encoding )
throws IOException, FileNotFoundException
{
super();
openInputFile( inputFileName , encoding );
}
/** Create CSVFileReader with a given field separator.
*
* @param inputFileName Name of the CSV formatted input file.
* @param encoding Character encoding for the file.
* @param separator Field separator.
*
* @throws FileNotFoundException If input file does not exist.
* @throws IOException If input file cannot be opened.
*/
public CSVFileReader
(
String inputFileName ,
String encoding ,
char separator
)
throws IOException , FileNotFoundException
{
super( separator );
openInputFile( inputFileName , encoding );
}
/** Create CSVFileReader with given field separator and qualifier character.
*
* @param inputFileName Name of the CSV formatted input file.
* @param encoding Character encoding for the file.
* @param separator Field separator.
* @param qualifier Qualified character.
* Ascii 0 means don't use a qualifier.
*
* @throws FileNotFoundException If input file does not exist.
* @throws IOException If input file cannot be opened.
*/
public CSVFileReader
(
String inputFileName ,
String encoding ,
char separator ,
char qualifier
)
throws FileNotFoundException, IOException
{
super( separator , qualifier );
openInputFile( inputFileName , encoding );
}
/** Open input file.
*
* @param inputFileName Name of the CSV formatted input file.
* @param encoding Character encoding for the file.
*/
protected void openInputFile
(
String inputFileName ,
String encoding
)
throws IOException
{
UnicodeReader streamReader = null;
if ( ( encoding == null ) || ( encoding.length() == 0 ) )
{
streamReader =
new UnicodeReader
(
new FileInputStream( new File( inputFileName ) )
);
}
else
{
streamReader =
new UnicodeReader
(
new FileInputStream( new File( inputFileName ) ) ,
encoding
);
}
in = new BufferedReader( streamReader );
}
/** Split the next line of the input CSV file into fields.
*
* @return List of strings containing each field from
* the next line of the file.
*
* @throws IOException If an error occurs while reading the new
* line from the file.
*/
public List<String> readFields()
throws IOException
{
List<String> fields = ListFactory.createNewList();
StringBuffer sb = new StringBuffer();
String line = in.readLine();
if ( line == null ) return null;
if ( line.length() == 0 )
{
fields.add( line );
return fields;
}
int i = 0;
do
{
sb.setLength( 0 );
if ( ( i < line.length() ) &&
( line.charAt( i ) == qualifier ) &&
( line.charAt( i ) != 0 )
)
{
i = handleQuotedField( line , sb , ++i );
}
else
{
i = handlePlainField( line , sb , i );
}
fields.add( sb.toString() );
i++;
}
while ( i < line.length() );
return fields;
}
/** Close the input CSV file.
*
* @throws IOException If an error occurs while closing the file.
*/
public void close()
throws IOException
{
in.close();
}
/** Handles a qualified field.
*
* @param s Input string.
* @param sb Output string buffer.
* @param i Current offset in string s.
*/
protected int handleQuotedField( String s , StringBuffer sb , int i )
{
int j;
int len = s.length();
for ( j = i ; j < len ; j++ )
{
if ( ( s.charAt( j ) == qualifier ) && ( ( j + 1 ) < len ) )
{
if ( s.charAt( j + 1 ) == qualifier )
{
// Skip escape char.
j++;
}
else if ( s.charAt( j + 1 ) == separator )
{
// Next delimiter.
j++;
// Skip end qualifiers.
break;
}
}
else if ( ( s.charAt( j ) == qualifier ) && ( ( j + 1 ) == len ) )
{
// End qualifiers at end of line.
break;
}
// Regular character.
sb.append( s.charAt( j ) );
}
return j;
}
/** Handles an unqualified field.
*
* @param s Input string.
* @param sb Output string buffer.
* @param i Current offset in string s.
*
* @return index of next separator.
*/
protected int handlePlainField( String s , StringBuffer sb , int i )
{
int result;
// Look for separator.
int j = s.indexOf( separator , i );
// No separator found.
// Append all remaining text from
// current position to end of input
// string.
if ( j == -1 )
{
sb.append( s.substring( i ) );
result = s.length();
}
// Separator found.
// Append all text from current
// position up to separator.
else
{
sb.append( s.substring( i , j ) );
result = j;
}
return result;
}
}