/*
* Copyright (c) 2016, Metron, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Metron, Inc. nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL METRON, INC. BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.metsci.glimpse.dspl.parser;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.buildPropertyTableData;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.buildSliceTableData;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.getColumns;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.getConcepts;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.getTypes;
import static com.metsci.glimpse.dspl.parser.util.QuoteAwareStringSplitter.splitLine;
import static com.metsci.glimpse.util.logging.LoggerUtils.logWarning;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.logging.Logger;
import javax.xml.bind.JAXBException;
import com.metsci.glimpse.dspl.DsplParser;
import com.metsci.glimpse.dspl.parser.column.SliceColumnType;
import com.metsci.glimpse.dspl.parser.table.PropertyTableData;
import com.metsci.glimpse.dspl.parser.table.SliceTableData;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.BooleanColumnParser;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.DateColumnParser;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.FloatColumnParser;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.IntegerColumnParser;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.ParserFactory;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.SimpleParserFactory;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.StringColumnParser;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.TableColumnParser;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.TableParserInfo;
import com.metsci.glimpse.dspl.schema.Concept;
import com.metsci.glimpse.dspl.schema.DataType;
import com.metsci.glimpse.dspl.schema.Slice;
import com.metsci.glimpse.dspl.schema.Table.Column;
import com.metsci.glimpse.dspl.util.DsplException;
import com.metsci.glimpse.dspl.util.DsplHelper;
/**
* The standard DSPL CSV parser capable of parsing CSV files which correspond to Google's
* rules for CSV table files.
*
* @author ulman
*/
public class CsvParser implements TableParser
{
public static final Logger logger = Logger.getLogger( CsvParser.class.getName( ) );
protected ParserFactory factory;
protected DsplParser dsplParser;
public CsvParser( DsplParser dsplParser )
{
this.dsplParser = dsplParser;
this.factory = createParserFactory( );
}
public ParserFactory createParserFactory( )
{
return new CSVParserFactory( );
}
@Override
public boolean isCachable( )
{
return true;
}
@Override
public SliceTableData parse( Slice slice ) throws IOException, JAXBException, DsplException
{
return parse( slice, DsplHelper.getTableInputStream( slice ) );
}
@Override
public SliceTableData parse( Slice slice, ReadableByteChannel channel ) throws IOException, JAXBException, DsplException
{
return parse( slice, Channels.newInputStream( channel ) );
}
@Override
public SliceTableData parse( Slice slice, InputStream stream ) throws IOException, JAXBException, DsplException
{
BufferedReader in = new BufferedReader( new InputStreamReader( stream ) );
try
{
TableParserInfo info = newParserInfo( slice, in );
parse( in, info );
return buildSliceTableData( slice, info, factory );
}
finally
{
in.close( );
}
}
@Override
public PropertyTableData parse( Concept concept ) throws IOException, JAXBException, DsplException
{
return parse( concept, DsplHelper.getTableInputStream( concept ) );
}
@Override
public PropertyTableData parse( Concept concept, ReadableByteChannel channel ) throws IOException, JAXBException, DsplException
{
return parse( concept, Channels.newInputStream( channel ) );
}
@Override
public PropertyTableData parse( Concept concept, InputStream stream ) throws IOException, JAXBException, DsplException
{
BufferedReader in = new BufferedReader( new InputStreamReader( stream ) );
try
{
TableParserInfo info = newParserInfo( concept, in );
parse( in, info );
return buildPropertyTableData( concept, info, factory );
}
finally
{
in.close( );
}
}
protected void parse( BufferedReader in, TableParserInfo info ) throws IOException, DsplException
{
CSVTableColumnParser[] parsers = ( CSVTableColumnParser[] ) info.getParsers( );
int size = parsers.length;
String line = null;
while ( ( line = in.readLine( ) ) != null )
{
if ( line.isEmpty( ) ) continue;
String[] tokens = splitLine( line );
if ( tokens.length == size )
{
for ( int i = 0; i < size; i++ )
{
parsers[i].addData( tokens[i] );
}
}
else
{
if ( dsplParser.isFailOnErrorMode( ) )
{
throw new DsplException( "Encountered row of incorrect size (expected %d found %d): [%s]", size, tokens.length, line );
}
else
{
logWarning( logger, "Skipping row of incorrect size (expected %d found %d): [%s]", size, tokens.length, line );
}
}
}
}
protected String[] parseFirstLine( BufferedReader in ) throws IOException, JAXBException
{
String line = in.readLine( );
String[] tokens = splitLine( line );
int size = tokens.length;
String[] ids = new String[size];
for ( int i = 0; i < size; i++ )
{
ids[i] = tokens[i].intern( );
}
return ids;
}
protected CSVTableColumnParser[] getParsers( ParserFactory factory, Column[] columns, DataType[] types ) throws DsplException
{
int size = columns.length;
CSVTableColumnParser[] parsers = new CSVTableColumnParser[size];
for ( int i = 0; i < size; i++ )
{
parsers[i] = ( CSVTableColumnParser ) factory.getParser( columns[i], types[i] );
}
return parsers;
}
public interface CSVTableColumnParser extends TableColumnParser
{
public void addData( String data ) throws DsplException;
public void addGap( );
}
public class CSVParserFactory extends SimpleParserFactory
{
@Override
public CSVTableColumnParser newConceptParser( Column column )
{
return new CSVStringColumnParser( );
}
@Override
public CSVTableColumnParser newDateParser( Column column )
{
return new CSVDateColumnParser( column );
}
@Override
public CSVTableColumnParser newBooleanParser( Column column )
{
return new CSVBooleanColumnParser( );
}
@Override
public CSVTableColumnParser newIntegerParser( Column column )
{
return new CSVIntegerColumnParser( );
}
@Override
public CSVTableColumnParser newFloatParser( Column column )
{
return new CSVFloatColumnParser( );
}
@Override
public CSVTableColumnParser newStringParser( Column column )
{
return new CSVStringColumnParser( );
}
}
protected class CSVStringColumnParser extends StringColumnParser implements CSVTableColumnParser
{
@Override
public void addData( String token )
{
data.add( parse( token ) );
}
@Override
public void addGap( )
{
data.add( null );
}
}
protected class CSVIntegerColumnParser extends IntegerColumnParser implements CSVTableColumnParser
{
@Override
public void addData( String token ) throws DsplException
{
if ( token.isEmpty( ) )
{
addGap( );
}
else
{
try
{
data.append( Integer.parseInt( token ) );
}
catch ( NumberFormatException e )
{
if ( dsplParser.isFailOnErrorMode( ) )
{
throw new DsplException( "Problem parsing: %s", e, token );
}
else
{
logWarning( logger, "Problem parsing token %s as type integer. Adding gap instead.", token );
addGap( );
}
}
}
}
//TODO we need a much better way to mark missing data
@Override
public void addGap( )
{
data.append( -1 );
}
}
protected class CSVFloatColumnParser extends FloatColumnParser implements CSVTableColumnParser
{
@Override
public void addData( String token ) throws DsplException
{
if ( token.isEmpty( ) )
{
addGap( );
}
else
{
try
{
data.append( Float.parseFloat( token ) );
}
catch ( NumberFormatException e )
{
if ( dsplParser.isFailOnErrorMode( ) )
{
throw new DsplException( "Problem parsing: %s", e, token );
}
else
{
logWarning( logger, "Problem parsing token %s as type float. Adding gap instead.", token );
addGap( );
}
}
}
}
//TODO we need a much better way to mark missing data
@Override
public void addGap( )
{
data.append( -1 );
}
}
protected class CSVBooleanColumnParser extends BooleanColumnParser implements CSVTableColumnParser
{
@Override
public void addData( String token ) throws DsplException
{
if ( token.isEmpty( ) )
{
addGap( );
}
else
{
try
{
data.append( Boolean.parseBoolean( token ) );
}
catch ( NumberFormatException e )
{
if ( dsplParser.isFailOnErrorMode( ) )
{
throw new DsplException( "Problem parsing: %s", e, token );
}
else
{
logWarning( logger, "Problem parsing token %s as type boolean. Adding gap instead.", token );
addGap( );
}
}
}
}
//TODO we need a much better way to mark missing data
@Override
public void addGap( )
{
data.append( false );
}
}
protected class CSVDateColumnParser extends DateColumnParser implements CSVTableColumnParser
{
public CSVDateColumnParser( Column column )
{
super( column );
}
@Override
public void addData( String token ) throws DsplException
{
if ( token.isEmpty( ) )
{
addGap( );
}
else
{
try
{
data.append( dateFormat.parseMillis( token ) );
}
catch ( IllegalArgumentException e )
{
if ( dsplParser.isFailOnErrorMode( ) )
{
throw new DsplException( "Problem parsing: %s", e, token );
}
else
{
logWarning( logger, "Trouble parsing date: %s. Adding gap instead.", token );
addGap( );
}
}
}
}
//TODO we need a much better way to mark missing data
@Override
public void addGap( )
{
data.append( -1 );
}
}
protected TableParserInfo newParserInfo( Concept concept, BufferedReader in ) throws IOException, JAXBException, DsplException
{
String[] columnIds = parseFirstLine( in );
Concept[] concepts = getConcepts( columnIds, concept );
Column[] columns = getColumns( columnIds, concept.getTable( ) );
DataType[] types = getTypes( concepts, columns );
CSVTableColumnParser[] parsers = getParsers( factory, columns, types );
return new TableParserInfo( columnIds, concepts, null, columns, types, parsers );
}
protected TableParserInfo newParserInfo( Slice slice, BufferedReader in ) throws IOException, JAXBException, DsplException
{
String[] columnIds = parseFirstLine( in );
Concept[] concepts = new Concept[columnIds.length];
SliceColumnType[] sliceColumnTypes = new SliceColumnType[columnIds.length];
getConcepts( columnIds, slice, concepts, sliceColumnTypes );
Column[] columns = getColumns( columnIds, slice.getTable( ) );
DataType[] types = getTypes( concepts, columns );
CSVTableColumnParser[] parsers = getParsers( factory, columns, types );
return new TableParserInfo( columnIds, concepts, sliceColumnTypes, columns, types, parsers );
}
}