/* * Copyright (c) 2016, Metron, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Metron, Inc. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL METRON, INC. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package com.metsci.glimpse.dspl.lite; import static com.metsci.glimpse.dspl.util.DataSetFactory.newDataset; import static com.metsci.glimpse.dspl.util.DataSetFactory.newValues; import static com.metsci.glimpse.dspl.util.DsplHelper.linkDataset; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import javax.xml.namespace.QName; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import com.metsci.glimpse.dspl.DsplParser; import com.metsci.glimpse.dspl.lite.schema.Column; import com.metsci.glimpse.dspl.lite.schema.DsplLite; import com.metsci.glimpse.dspl.lite.schema.DsplLite.Columns; import com.metsci.glimpse.dspl.schema.Attribute; import com.metsci.glimpse.dspl.schema.Concept; import com.metsci.glimpse.dspl.schema.ConceptInfo; import com.metsci.glimpse.dspl.schema.Data; import com.metsci.glimpse.dspl.schema.Data.File; import com.metsci.glimpse.dspl.schema.DataSet; import com.metsci.glimpse.dspl.schema.DataSet.Tables; import com.metsci.glimpse.dspl.schema.DataType; import com.metsci.glimpse.dspl.schema.Slice; import com.metsci.glimpse.dspl.schema.SliceConceptRef; import com.metsci.glimpse.dspl.schema.SliceTableMapping; import com.metsci.glimpse.dspl.schema.Table; import com.metsci.glimpse.dspl.schema.Value; import com.metsci.glimpse.dspl.util.DsplException; import com.metsci.glimpse.dspl.util.DsplHelper; import com.metsci.glimpse.util.io.StreamOpener; public class DsplLiteHelper { public static final String dsplLiteSchema = "com.metsci.glimpse.dspl.lite.schema"; public static final String defaultTimeFormat = "yyyy-MM-dd HH:mm:ss.SSS Z"; public static final DateTimeFormatter defaultFormatter = DateTimeFormat.forPattern( defaultTimeFormat ); public static DataSet loadNonCanonicalDataSet_xml_lite( DsplParser parser, String location ) throws JAXBException, IOException, DsplException { InputStream stream = StreamOpener.fileThenResource.openForRead( location ); JAXBContext jc = JAXBContext.newInstance( dsplLiteSchema ); Unmarshaller unmarshaller = jc.createUnmarshaller( ); DsplLite lite_dataset = ( DsplLite ) unmarshaller.unmarshal( stream ); DataSet dataset = loadNonCanonicalDataSet_xml_lite( lite_dataset, location ); return linkDataset( parser, dataset, new java.io.File( location ) ); } public static DataSet loadNonCanonicalDataSet_csv( DsplParser parser, String location ) throws JAXBException, IOException, DsplException { DsplLite dataset_lite = loadNonCanonicalDataSet_csv_lite( parser, location ); DataSet dataset = loadNonCanonicalDataSet_xml_lite( dataset_lite, location ); return linkDataset( parser, dataset, new java.io.File( location ) ); } protected static DataSet loadNonCanonicalDataSet_xml_lite( DsplLite dspl_lite, String location ) throws JAXBException, IOException, DsplException { String namespace = dspl_lite.getTargetNamespace( ); DataSet dataset = newDatasetWithAllImports( namespace ); InputStream stream = StreamOpener.fileThenResource.openForRead( location ); BufferedReader in = new BufferedReader( new InputStreamReader( stream ) ); try { // create a Slice for the DataSet Slice slice = new Slice( ); slice.setId( "default_slice" ); SliceTableMapping mapping = new SliceTableMapping( ); mapping.setRef( new QName( namespace, "default_table" ) ); slice.setTableMapping( mapping ); dataset.getSlices( ).getSlice( ).add( slice ); // create a Table for the DataSet Table table = new Table( ); dataset.setTables( new Tables( ) ); table.setId( "default_table" ); Data data = new Data( ); File file = new File( ); com.metsci.glimpse.dspl.lite.schema.File file_lite = dspl_lite.getFile( ); file.setEncoding( file_lite.getEncoding( ) ); // only utf-8 is supported file.setFormat( file_lite.getFormat( ) ); // csv plus header information file.setValue( file_lite.getValue( ) ); data.setFile( file ); table.setData( data ); dataset.getTables( ).getTable( ).add( table ); // add a new Concept for each header line for ( Column column : dspl_lite.getColumns( ).getColumn( ) ) { String header = column.getId( ); Concept concept = new Concept( ); concept.setId( header ); concept.setDataSet( dataset ); dataset.getConcepts( ).getConcept( ).add( concept ); ConceptInfo info = new ConceptInfo( ); concept.setInfo( info ); boolean isMetric = true; String format = null; DataType type = null; // set the type of the concept (float,integer,string,date,long,concept) if ( column.getType( ) != null ) { type = DataType.fromValue( column.getType( ) ); Concept.Type typeElement = new Concept.Type( ); typeElement.setRef( type ); concept.setType( typeElement ); } // set the units associated with the concept (this is set as an attribute of the concept) if ( column.getUnit( ) != null ) { Attribute attribute = new Attribute( ); attribute.setId( "unit" ); attribute.setParentConcept( concept ); Value valueElement = new Value( ); valueElement.setValue( column.getUnit( ) ); attribute.getValue( ).add( valueElement ); Attribute.Type typeElement = new Attribute.Type( ); typeElement.setRef( DataType.CONCEPT ); attribute.setType( typeElement ); attribute.setConceptRef( new QName( "http://www.metsci.com/dspl/physical_units", "physical_unit" ) ); concept.getAttribute( ).add( attribute ); } // set the parent concept if ( column.getParent( ) != null ) { if ( !column.getParent( ).equals( "none" ) ) { Pattern p = Pattern.compile( "\\{(.*)\\}(.*)" ); Matcher m = p.matcher( column.getParent( ) ); if ( m.matches( ) ) { String parentNamespace = m.group( 1 ); String parentConcept = m.group( 2 ); concept.setExtends( new QName( parentNamespace, parentConcept ) ); } else { throw new DsplException( "Failed to parse %s. Malformed parent parameter %s.", location, column.getParent( ) ); } } } else { // if no parent is specified, automatically assign one for certain types switch ( type ) { case INTEGER: concept.setExtends( new QName( "http://www.google.com/publicdata/dataset/google/quantity", "amount" ) ); case FLOAT: concept.setExtends( new QName( "http://www.google.com/publicdata/dataset/google/quantity", "magnitude" ) ); default: // do nothing } } if ( column.getName( ) != null ) { info.setName( newValues( column.getName( ) ) ); } if ( column.getDescription( ) != null ) { info.setDescription( newValues( column.getDescription( ) ) ); } if ( column.getUrl( ) != null ) { info.setUrl( newValues( column.getUrl( ) ) ); } if ( column.getFormat( ) != null ) { format = column.getFormat( ); } isMetric = !column.isKey( ); SliceConceptRef conceptRef = new SliceConceptRef( ); conceptRef.setConceptRef( new QName( namespace, header ) ); if ( isMetric ) { slice.getMetric( ).add( conceptRef ); } else { slice.getDimension( ).add( conceptRef ); } com.metsci.glimpse.dspl.schema.Table.Column dspl_column; dspl_column = new com.metsci.glimpse.dspl.schema.Table.Column( ); dspl_column.setId( header ); dspl_column.setFormat( format ); table.getColumn( ).add( dspl_column ); } return dataset; } finally { in.close( ); } } protected static DsplLite loadNonCanonicalDataSet_csv_lite( DsplParser parser, String location ) throws JAXBException, IOException, DsplException { InputStream stream = StreamOpener.fileThenResource.openForRead( location ); BufferedReader in = new BufferedReader( new InputStreamReader( stream ) ); try { DsplLite dspl_lite = new DsplLite( ); dspl_lite.setColumns( new Columns( ) ); dspl_lite.setTargetNamespace( location ); com.metsci.glimpse.dspl.lite.schema.File file; file = new com.metsci.glimpse.dspl.lite.schema.File( ); file.setValue( getFileName( location ) ); dspl_lite.setFile( file ); // read the header line String line = in.readLine( ); String[] headers = line.split( "," ); int size = headers.length; line = in.readLine( ); String[] data = line.split( "," ); String[] types = inferTypesFromData( data ); // add a new Concept for each header line for ( int i = 0; i < size; i++ ) { String header = headers[i]; String type = types[i]; Column column = new Column( ); column.setId( header ); column.setType( type ); if ( type.equals( "date" ) ) column.setFormat( defaultTimeFormat ); dspl_lite.getColumns( ).getColumn( ).add( column ); } return dspl_lite; } finally { in.close( ); } } protected static String[] inferTypesFromData( String[] data ) { int size = data.length; String[] types = new String[size]; for ( int i = 0; i < size; i++ ) { types[i] = inferTypeFromData( data[i] ); } return types; } protected static String inferTypeFromData( String data ) { if ( data == null || data.isEmpty( ) ) return "string"; try { Float.parseFloat( data ); return "float"; } catch ( NumberFormatException e ) { } try { Integer.parseInt( data ); return "integer"; } catch ( NumberFormatException e ) { } // can't use Boolean.parseBoolean( data ) here because // it interprets everything which is not "true" as false if ( data.equals( "true" ) || data.equals( "false" ) ) { return "boolean"; } try { defaultFormatter.parseMillis( data ); return "date"; } catch ( IllegalArgumentException e ) { } return "string"; } protected static String getFileName( String location ) { try { URL url = DsplHelper.class.getClassLoader( ).getResource( location ); if ( url != null ) { String path = url.getFile( ); int index = path.lastIndexOf( "/" ); return path.substring( index + 1, path.length( ) ); } } catch ( Exception e ) { // do nothing, try loading as a local file } java.io.File file = new java.io.File( location ); return file.getName( ); } protected static DataSet newDatasetWithAllImports( String name ) { //@formatter:off return newDataset( name, "http://www.google.com/publicdata/dataset/google/entity", "http://www.google.com/publicdata/dataset/google/geo", "http://www.google.com/publicdata/dataset/google/quantity", "http://www.google.com/publicdata/dataset/google/unit", "http://www.metsci.com/dspl/time", "http://www.metsci.com/dspl/physical_units", "http://www.metsci.com/dspl/track" ); //@formatter:on } }