/*
* Copyright (c) 2016, Metron, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Metron, Inc. nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL METRON, INC. BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.metsci.glimpse.dspl.parser;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.getColumns;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.getConcepts;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.getConstantTableColumns;
import static com.metsci.glimpse.dspl.parser.util.ParserUtils.getTypes;
import static com.metsci.glimpse.util.GeneralUtils.newLinkedHashMap;
import static com.metsci.glimpse.util.GeneralUtils.newTreeSet;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import javax.xml.bind.JAXBException;
import com.metsci.glimpse.dspl.parser.SimpleBinaryParser.HeaderInformation;
import com.metsci.glimpse.dspl.parser.column.CompactInternStringTableColumn;
import com.metsci.glimpse.dspl.parser.column.CompactStringTableColumn;
import com.metsci.glimpse.dspl.parser.column.SimpleTableColumn;
import com.metsci.glimpse.dspl.parser.column.SliceColumnType;
import com.metsci.glimpse.dspl.parser.column.TableColumn;
import com.metsci.glimpse.dspl.parser.table.PropertyTableData;
import com.metsci.glimpse.dspl.parser.table.SimplePropertyTableData;
import com.metsci.glimpse.dspl.parser.table.SimpleSliceTableData;
import com.metsci.glimpse.dspl.parser.table.SliceTableData;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.ParserFactory;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.SimpleParserFactory;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.TableColumnParser;
import com.metsci.glimpse.dspl.parser.util.ParserUtils.TableParserInfo;
import com.metsci.glimpse.dspl.schema.Concept;
import com.metsci.glimpse.dspl.schema.DataType;
import com.metsci.glimpse.dspl.schema.Slice;
import com.metsci.glimpse.dspl.schema.Table.Column;
import com.metsci.glimpse.dspl.util.DsplException;
import com.metsci.glimpse.dspl.util.DsplHelper;
import com.metsci.glimpse.util.io.datapipe.ReadableDataChannel;
import com.metsci.glimpse.util.io.datapipe.WritableDataChannel;
public class ColumnBinaryParser implements TableParser, TableWriter
{
public static final int MAGIC = 0x1234CDEF;
public static final String ENCODING = "UTF-8";
protected SimpleParserFactory factory;
protected byte[] buffer;
public ColumnBinaryParser( )
{
this.factory = createParserFactory( );
this.buffer = new byte[1024];
}
public SimpleParserFactory createParserFactory( )
{
return new SimpleParserFactory( );
}
public void write( Slice slice, SliceTableData data, WritableByteChannel byteChannel ) throws IOException, DsplException, JAXBException
{
WritableDataChannel dataChannel = new WritableDataChannel( byteChannel );
try
{
// each .bin file must start with the correct magic number identifier
dataChannel.writeInt( MAGIC );
Collection<String> dimensionConceptIds = data.getDimensionColumnIds( );
Collection<String> metricConceptIds = data.getMetricColumnIds( );
// count the number of columns to write out
// constant columns have their value specified in the dspl metadata, so they are not counted
int numColumns = 0;
int numRows = data.getNumRows( );
List<TableColumn> columnList = new ArrayList<TableColumn>( );
for ( String dimensionConceptId : dimensionConceptIds )
{
TableColumn column = data.getDimensionColumn( dimensionConceptId );
if ( !column.isConstant( ) )
{
columnList.add( column );
numColumns++;
}
}
for ( String metricConceptId : metricConceptIds )
{
TableColumn column = data.getMetricColumn( metricConceptId );
if ( !column.isConstant( ) )
{
columnList.add( column );
numColumns++;
}
}
// write the number of rows and columns in the data set
dataChannel.writeInt( numColumns );
dataChannel.writeInt( numRows );
// write the header information for non-constant columns
for ( String dimensionConceptId : dimensionConceptIds )
{
TableColumn column = data.getDimensionColumn( dimensionConceptId );
if ( !column.isConstant( ) )
{
dataChannel.writeString( column.getColumn( ).getId( ) );
}
}
for ( String metricConceptId : metricConceptIds )
{
TableColumn column = data.getMetricColumn( metricConceptId );
if ( !column.isConstant( ) )
{
dataChannel.writeString( column.getColumn( ).getId( ) );
}
}
for ( int j = 0; j < numColumns; j++ )
{
TableColumn column = columnList.get( j );
write( column, dataChannel );
}
}
finally
{
dataChannel.flushBuffer( );
dataChannel.close( );
}
}
@Override
public void write( Slice slice, SliceTableData data, OutputStream stream ) throws IOException, DsplException, JAXBException
{
write( slice, data, Channels.newChannel( stream ) );
}
public void write( Concept concept, PropertyTableData data, WritableByteChannel byteChannel ) throws IOException, DsplException, JAXBException
{
WritableDataChannel dataChannel = new WritableDataChannel( byteChannel );
try
{
// each .bin file must start with the correct magic number identifier
dataChannel.writeInt( MAGIC );
Collection<String> conceptIds = data.getColumnIds( );
// count the number of columns to write out
// constant columns have their value specified in the dspl metadata, so they are not counted
int numColumns = 0;
int numRows = data.getNumRows( );
List<TableColumn> columnList = new ArrayList<TableColumn>( );
for ( String conceptId : conceptIds )
{
TableColumn column = data.getColumn( conceptId );
if ( !column.isConstant( ) )
{
columnList.add( column );
numColumns++;
}
}
// write the number of rows and columns in the data set
dataChannel.writeInt( numColumns );
dataChannel.writeInt( numRows );
// write the header information for non-constant columns
for ( String conceptId : conceptIds )
{
TableColumn column = data.getColumn( conceptId );
if ( !column.isConstant( ) )
{
dataChannel.writeString( column.getColumn( ).getId( ) );
}
}
for ( int j = 0; j < numColumns; j++ )
{
TableColumn column = columnList.get( j );
write( column, dataChannel );
}
}
finally
{
dataChannel.flushBuffer( );
dataChannel.close( );
}
}
@Override
public void write( Concept concept, PropertyTableData data, OutputStream stream ) throws IOException, DsplException, JAXBException
{
write( concept, data, Channels.newChannel( stream ) );
}
protected void write( TableColumn column, WritableDataChannel dataChannel ) throws DsplException, IOException
{
String columnFormat = column.getColumn( ).getFormat( );
switch ( column.getType( ) )
{
case STRING:
if ( columnFormat != null && columnFormat.contentEquals( "intern" ) )
writeInternStringArray( dataChannel, column.getStringData( ) );
else
writeStringArray( dataChannel, column.getStringData( ) );
break;
case FLOAT:
dataChannel.writeFloatArray( column.getFloatData( ) );
break;
case INTEGER:
dataChannel.writeIntArray( column.getIntegerData( ) );
break;
case BOOLEAN:
dataChannel.writeBooleanArray( column.getBooleanData( ) );
break;
case DATE:
dataChannel.writeLongArray( column.getDateData( ) );
break;
case CONCEPT:
writeStringArray( dataChannel, column.getStringData( ) );
break;
default:
throw new DsplException( "Unknown Type %s provided.", column.getType( ) );
}
}
@Override
public SliceTableData parse( Slice slice ) throws IOException, JAXBException, DsplException
{
return parse( slice, DsplHelper.getTableInputStream( slice ) );
}
@Override
public PropertyTableData parse( Concept concept ) throws IOException, JAXBException, DsplException
{
return parse( concept, DsplHelper.getTableInputStream( concept ) );
}
@Override
public PropertyTableData parse( Concept concept, InputStream stream ) throws IOException, JAXBException, DsplException
{
return parse( concept, Channels.newChannel( stream ) );
}
@Override
public PropertyTableData parse( Concept concept, ReadableByteChannel byteChannel ) throws IOException, JAXBException, DsplException
{
ReadableDataChannel dataChannel = new ReadableDataChannel( byteChannel );
try
{
BinaryTableParserInfo info = newParserInfo( concept, dataChannel );
Concept[] concepts = info.getConcepts( );
Column[] columns = info.getColumns( );
DataType[] types = info.getDataTypes( );
int numRows = info.getNumRows( );
Map<String, TableColumn> map = new HashMap<String, TableColumn>( );
for ( int i = 0; i < info.getNumColumns( ); i++ )
{
DataType type = types[i];
Column column = columns[i];
Concept columnConcept = concepts[i];
String columnFormat = column.getFormat( );
TableColumn columnData = null;
switch ( type )
{
case STRING:
if ( columnFormat != null && columnFormat.contentEquals( "intern" ) )
columnData = readInternStringArray( dataChannel, column, columnConcept, new String[numRows] );
else
columnData = readStringArray( dataChannel, column, columnConcept, new String[numRows] );
break;
case FLOAT:
columnData = new SimpleTableColumn( column, columnConcept, type, dataChannel.readFloatArray( new float[numRows] ), numRows );
break;
case INTEGER:
columnData = new SimpleTableColumn( column, columnConcept, type, dataChannel.readIntArray( new int[numRows] ), numRows );
break;
case BOOLEAN:
columnData = new SimpleTableColumn( column, columnConcept, type, dataChannel.readBooleanArray( new boolean[numRows] ), numRows );
break;
case DATE:
columnData = new SimpleTableColumn( column, columnConcept, type, dataChannel.readLongArray( new long[numRows] ), numRows );
break;
case CONCEPT:
columnData = readStringArray( dataChannel, column, columnConcept, new String[numRows] );
break;
default:
throw new DsplException( "Unknown Type %s provided for Column %s.", type, info.getColumnIds( )[i] );
}
map.put( column.getId( ), columnData );
}
map.putAll( getConstantTableColumns( concept, info, factory, numRows ) );
return new SimplePropertyTableData( concept, map );
}
finally
{
dataChannel.close( );
}
}
@Override
public SliceTableData parse( Slice slice, InputStream stream ) throws IOException, JAXBException, DsplException
{
return parse( slice, Channels.newChannel( stream ) );
}
@Override
public SliceTableData parse( Slice slice, ReadableByteChannel byteChannel ) throws IOException, JAXBException, DsplException
{
ReadableDataChannel dataChannel = new ReadableDataChannel( byteChannel );
try
{
BinaryTableParserInfo info = newParserInfo( slice, dataChannel );
Concept[] concepts = info.getConcepts( );
Column[] columns = info.getColumns( );
DataType[] types = info.getDataTypes( );
SliceColumnType[] sliceColumnTypes = info.getSliceColumnTypes( );
int numRows = info.getNumRows( );
Map<String, TableColumn> dimensionMap = new HashMap<String, TableColumn>( );
Map<String, TableColumn> metricMap = new HashMap<String, TableColumn>( );
Map<String, TableColumn> map = null;
for ( int i = 0; i < info.getNumColumns( ); i++ )
{
DataType type = types[i];
Column column = columns[i];
Concept columnConcept = concepts[i];
SliceColumnType sliceType = sliceColumnTypes[i];
String columnFormat = column.getFormat( );
switch ( sliceType )
{
case Dimension:
map = dimensionMap;
break;
case Metric:
map = metricMap;
break;
}
TableColumn columnData = null;
switch ( type )
{
case STRING:
if ( columnFormat != null && columnFormat.contentEquals( "intern" ) )
columnData = readInternStringArray( dataChannel, column, columnConcept, new String[numRows] );
else
columnData = readStringArray( dataChannel, column, columnConcept, new String[numRows] );
break;
case FLOAT:
columnData = new SimpleTableColumn( column, columnConcept, type, dataChannel.readFloatArray( new float[numRows] ), numRows );
break;
case INTEGER:
columnData = new SimpleTableColumn( column, columnConcept, type, dataChannel.readIntArray( new int[numRows] ), numRows );
break;
case BOOLEAN:
columnData = new SimpleTableColumn( column, columnConcept, type, dataChannel.readBooleanArray( new boolean[numRows] ), numRows );
break;
case DATE:
columnData = new SimpleTableColumn( column, columnConcept, type, dataChannel.readLongArray( new long[numRows] ), numRows );
break;
case CONCEPT:
columnData = readStringArray( dataChannel, column, columnConcept, new String[numRows] );
break;
default:
throw new DsplException( "Unknown Type %s provided for Column %s.", type, info.getColumnIds( )[i] );
}
map.put( column.getId( ), columnData );
}
dimensionMap.putAll( getConstantTableColumns( slice, info, factory, SliceColumnType.Dimension, numRows ) );
metricMap.putAll( getConstantTableColumns( slice, info, factory, SliceColumnType.Metric, numRows ) );
return new SimpleSliceTableData( slice, dimensionMap, metricMap );
}
finally
{
dataChannel.close( );
}
}
@Override
public boolean isCachable( )
{
return false;
}
protected HeaderInformation parseFirstLine( ReadableDataChannel dataChannel ) throws IOException, JAXBException, DsplException
{
int magic = dataChannel.readInt( );
if ( magic != MAGIC )
{
throw new DsplException( "File does not contain proper header code: %s", Integer.toHexString( MAGIC ) );
}
int numColumns = dataChannel.readInt( );
int numRowsPerColumn = dataChannel.readInt( );
String[] headerStrings = new String[numColumns];
for ( int i = 0; i < numColumns; i++ )
{
headerStrings[i] = dataChannel.readString( );
}
return new HeaderInformation( numColumns, numRowsPerColumn, headerStrings );
}
protected BinaryTableParserInfo newParserInfo( Concept concept, ReadableDataChannel dataChannel ) throws IOException, JAXBException, DsplException
{
HeaderInformation header = parseFirstLine( dataChannel );
String[] columnIds = header.getHeaderNames( );
Concept[] concepts = getConcepts( columnIds, concept );
Column[] columns = getColumns( columnIds, concept.getTable( ) );
DataType[] types = getTypes( concepts, columns );
TableColumnParser[] parsers = getParsers( factory, columns, types );
return new BinaryTableParserInfo( header.getNumColumns( ), header.getNumRowsPerColumn( ), columnIds, concepts, null, columns, types, parsers );
}
protected BinaryTableParserInfo newParserInfo( Slice slice, ReadableDataChannel dataChannel ) throws IOException, JAXBException, DsplException
{
HeaderInformation header = parseFirstLine( dataChannel );
String[] columnIds = header.getHeaderNames( );
int numColumns = header.getNumColumns( );
Concept[] concepts = new Concept[numColumns];
SliceColumnType[] sliceColumnTypes = new SliceColumnType[numColumns];
getConcepts( header.getHeaderNames( ), slice, concepts, sliceColumnTypes );
Column[] columns = getColumns( columnIds, slice.getTable( ) );
DataType[] types = getTypes( concepts, columns );
TableColumnParser[] parsers = getParsers( factory, columns, types );
return new BinaryTableParserInfo( header.getNumColumns( ), header.getNumRowsPerColumn( ), columnIds, concepts, sliceColumnTypes, columns, types, parsers );
}
protected TableColumnParser[] getParsers( ParserFactory factory, Column[] columns, DataType[] types ) throws DsplException
{
int size = columns.length;
TableColumnParser[] parsers = new TableColumnParser[size];
for ( int i = 0; i < size; i++ )
{
parsers[i] = factory.getParser( columns[i], types[i] );
}
return parsers;
}
protected class BinaryTableParserInfo extends TableParserInfo
{
protected int numColumns;
protected int numRows;
public BinaryTableParserInfo( int numColumns, int numRows, String[] columnIds, Concept[] concepts, SliceColumnType[] sliceColumnTypes, Column[] columns, DataType[] types, TableColumnParser[] parsers )
{
super( columnIds, concepts, sliceColumnTypes, columns, types, parsers );
this.numColumns = numColumns;
this.numRows = numRows;
}
public int getNumColumns( )
{
return numColumns;
}
public int getNumRows( )
{
return numRows;
}
}
protected void writeInternStringArray( WritableDataChannel dataChannel, String[] array ) throws IOException
{
// build mapping
TreeSet<String> unique = newTreeSet( );
for ( String string : array )
if ( string != null ) unique.add( string );
// mapping from string to index (probably could skip this or the previous if clever)
Map<String, Integer> map = newLinkedHashMap( );
int stringIndex = 0;
for ( String string : unique )
map.put( string, stringIndex++ );
// lookups for each row
int[] index = new int[array.length];
for ( int i = 0; i < array.length; i++ )
index[i] = array[i] == null ? -1 : map.get( array[i] );
// write strings
dataChannel.writeInt( map.size( ) );
for ( String string : map.keySet( ) )
writeString( string, dataChannel );
dataChannel.writeIntArray( index );
}
protected TableColumn readInternStringArray( ReadableDataChannel dataChannel, Column column, Concept concept, String[] array ) throws IOException
{
int nUnique = dataChannel.readInt( );
String[] uniqueStrings = new String[nUnique];
for ( int i = 0; i < nUnique; i++ )
{
String string = readString( dataChannel );
uniqueStrings[i] = string.intern( );
}
int[] indexForRow = new int[array.length];
dataChannel.readIntArray( indexForRow );
return new CompactInternStringTableColumn( column, concept, array.length, uniqueStrings, indexForRow );
}
protected String readString( ReadableDataChannel in ) throws IOException
{
int stringSize = in.readInt( );
if ( stringSize == 0 ) return null;
byte[] data = new byte[stringSize];
in.readByteArray( data );
return new String( data, ENCODING );
}
protected void writeString( String string, WritableDataChannel dataChannel ) throws IOException
{
if ( string == null )
{
dataChannel.writeInt( 0 );
}
else
{
byte[] stringData = string.getBytes( SimpleBinaryParser.ENCODING );
dataChannel.writeInt( stringData.length );
dataChannel.writeByteArray( stringData );
}
}
protected void writeStringArray( WritableDataChannel dataChannel, String[] array ) throws IOException
{
byte[][] encodings = new byte[array.length][];
int byteCount = 0;
for ( int i = 0; i < array.length; i++ )
{
String s = array[i];
if ( s != null )
{
encodings[i] = s.getBytes( ENCODING );
byteCount += encodings[i].length;
}
}
// write the total number of bytes in the encoded string array
dataChannel.writeInt( byteCount );
// assuming the character data for the strings are stored in a single large string,
// write the index of the first character of each string
int stringStartChar = 0;
for ( int i = 0; i < array.length; i++ )
{
String s = array[i];
dataChannel.writeInt( stringStartChar );
if ( s != null ) stringStartChar += array[i].length( );
}
for ( int i = 0; i < array.length; i++ )
{
String s = array[i];
if ( s != null )
{
dataChannel.writeByteArray( encodings[i] );
}
}
}
protected TableColumn readStringArray( ReadableDataChannel dataChannel, Column column, Concept concept, String[] array ) throws IOException
{
int byteCount = dataChannel.readInt( );
int[] stringStartIndices = new int[array.length];
dataChannel.readIntArray( stringStartIndices );
byte[] stringData = new byte[byteCount];
dataChannel.readByteArray( stringData );
String allStrings = new String( stringData, 0, byteCount, ENCODING );
return new CompactStringTableColumn( column, concept, array.length, allStrings, stringStartIndices );
}
}