/* * Copyright (c) 2016, Metron, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Metron, Inc. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL METRON, INC. BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package com.metsci.glimpse.dspl.parser.util; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.net.URL; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; import javax.xml.bind.JAXBException; import com.metsci.glimpse.dspl.DsplParser; import com.metsci.glimpse.dspl.parser.MultipleFileTableParser; import com.metsci.glimpse.dspl.parser.TableParser; import com.metsci.glimpse.dspl.parser.TableWriter; import com.metsci.glimpse.dspl.parser.table.PropertyTableData; import com.metsci.glimpse.dspl.parser.table.SliceTableData; import com.metsci.glimpse.dspl.schema.Concept; import com.metsci.glimpse.dspl.schema.DataSet; import com.metsci.glimpse.dspl.schema.Slice; import com.metsci.glimpse.dspl.schema.Table; import com.metsci.glimpse.dspl.util.DsplException; import com.metsci.glimpse.dspl.util.DsplHelper; import com.metsci.glimpse.dspl.util.FileNameCleaner; import com.metsci.glimpse.dspl.util.MD5Checksum; public class DsplCacheHelper { public static final Logger logger = Logger.getLogger( DsplCacheHelper.class.getName( ) ); public static final String CACHE_ROOT = ".dspl"; public static final String CACHE_FILE = "cache_data"; public static final String CACHE_FORMAT = "bin"; public static final String HASH_FILE = ".md5"; public static PropertyTableData getTableData( Concept concept ) throws DsplException, JAXBException, IOException { try { if ( concept == null ) return null; DataSet dataset = concept.getDataSet( ); if ( dataset == null ) return null; DsplParser dsplParser = dataset.getParser( ); if ( dsplParser == null ) return null; String calculatedHash = getCalculatedHash( concept ); if ( calculatedHash == null ) return null; Table table = concept.getTable( ); if ( table == null ) return null; TableParser parser = dsplParser.getTableParser( table ); if ( parser == null ) return null; // check whether this data type should be cached (we don't bother for fast formats) // if not, simply parse it in the regular way if ( !parser.isCachable( ) ) { return parser.parse( concept ); } String cachedHash = getCachedHash( concept ); // the cache has not been created or the csv data has changed, load the data from the csv file and recreate the hash if ( cachedHash == null || !cachedHash.equals( calculatedHash ) ) { PropertyTableData tableData = parser.parse( concept ); File cacheFile = getCacheFile( concept ); FileOutputStream cacheFileStream = new FileOutputStream( cacheFile ); String cacheFormat = getExtension( cacheFile ); TableParser cacheParser = dsplParser.getTableParser( cacheFormat ); if ( cacheParser instanceof TableWriter ) { ( ( TableWriter ) cacheParser ).write( concept, tableData, cacheFileStream.getChannel( ) ); writeHash( concept, calculatedHash ); } else { throw new DsplException( "Invalid cache format: %s. TableParser %s is not a TableWriter.", cacheFormat, cacheParser.getClass( ) ); } return tableData; } // load the data from the cache else { File cacheFile = getCacheFile( concept ); FileInputStream cacheFileStream = new FileInputStream( cacheFile ); TableParser cacheParser = dsplParser.getTableParser( getExtension( cacheFile ) ); return cacheParser.parse( concept, cacheFileStream.getChannel( ) ); } } catch ( NoSuchAlgorithmException e ) { throw new DsplException( "Unable to load from cache.", e ); } } public static SliceTableData getTableData( Slice slice ) throws DsplException, JAXBException, IOException { try { if ( slice == null ) return null; DataSet dataset = slice.getDataSet( ); if ( dataset == null ) return null; DsplParser dsplParser = dataset.getParser( ); if ( dsplParser == null ) return null; String calculatedHash = getCalculatedHash( slice ); if ( calculatedHash == null ) return null; Table table = slice.getTable( ); if ( table == null ) return null; TableParser parser = dsplParser.getTableParser( table ); if ( parser == null ) return null; // check whether this data type should be cached (we don't bother for fast formats) // if not, simply parse it in the regular way if ( !parser.isCachable( ) ) { return parser.parse( slice ); } String cachedHash = getCachedHash( slice ); // the cache has not been created or the csv data has changed, load the data from the csv file and recreate the hash if ( cachedHash == null || !cachedHash.equals( calculatedHash ) ) { SliceTableData tableData = parser.parse( slice ); File cacheFile = getCacheFile( slice ); OutputStream cacheFileStream = new FileOutputStream( cacheFile ); String cacheFormat = getExtension( cacheFile ); TableParser cacheParser = dsplParser.getTableParser( cacheFormat ); if ( cacheParser instanceof TableWriter ) { ( ( TableWriter ) cacheParser ).write( slice, tableData, cacheFileStream ); writeHash( slice, calculatedHash ); } else { throw new DsplException( "Invalid cache format: %s. TableParser %s is not a TableWriter.", cacheFormat, cacheParser.getClass( ) ); } return tableData; } // load the data from the cache else { File cacheFile = getCacheFile( slice ); InputStream cacheFileStream = new FileInputStream( cacheFile ); TableParser cacheParser = dsplParser.getTableParser( getExtension( cacheFile ) ); return cacheParser.parse( slice, cacheFileStream ); } } catch ( NoSuchAlgorithmException e ) { throw new DsplException( "Unable to load from cache.", e ); } } public static File getCacheDirectory( Concept concept ) throws DsplException, JAXBException, IOException { return getCacheDirectory( concept.getDataSet( ), concept.getTable( ), concept.getId( ) ); } public static File getCacheDirectory( Slice slice ) throws DsplException, JAXBException, IOException { return getCacheDirectory( slice.getDataSet( ), slice.getTable( ), slice.getId( ) ); } protected static File getCacheDirectory( DataSet dataset, Table table, String id ) throws DsplException, JAXBException, IOException { if ( dataset == null || table == null || id == null ) throw new DsplException( "Dataset, Table, or Id is not initialized. Unable to create cache file." ); File cacheDirectory = dataset.getParser( ).getCacheDirectory( ); if ( cacheDirectory == null ) { String userHome = System.getProperty( "user.home" ); cacheDirectory = new File( userHome ); if ( !cacheDirectory.canWrite( ) ) { String tempDir = System.getProperty( "java.io.tmpdir" ); cacheDirectory = new File( tempDir ); if ( !cacheDirectory.canWrite( ) ) { throw new DsplException( "Unable to save cached dspl data files." ); } } } String namespace = null; if ( dataset.getTargetNamespace( ) == null ) { namespace = dataset.getFile( ).getName( ); } else { namespace = FileNameCleaner.cleanFileName( dataset.getTargetNamespace( ) ); } String sliceId = FileNameCleaner.cleanFileName( id ); File dsplCacheBase = new File( cacheDirectory, CACHE_ROOT ); File namespaceDir = new File( dsplCacheBase, namespace ); File sliceDir = new File( namespaceDir, sliceId ); if ( sliceDir.exists( ) ) { return sliceDir; } boolean success = sliceDir.mkdirs( ); if ( !success ) { throw new DsplException( "Unable to save cached dspl data files." ); } return sliceDir; } public static File getCacheFile( Concept concept ) throws DsplException, JAXBException, IOException, NoSuchAlgorithmException { return getCacheFile( concept.getDataSet( ), concept.getTable( ), concept.getId( ) ); } public static File getCacheFile( Slice slice ) throws DsplException, JAXBException, IOException, NoSuchAlgorithmException { return getCacheFile( slice.getDataSet( ), slice.getTable( ), slice.getId( ) ); } protected static File getCacheFile( DataSet dataset, Table table, String id ) throws DsplException, JAXBException, IOException, NoSuchAlgorithmException { //String dataFile = table.getData( ).getFile( ).getValue( ); File sliceDir = getCacheDirectory( dataset, table, id ); return new File( sliceDir, CACHE_FILE + "." + CACHE_FORMAT ); } public static String getCalculatedHash( Slice slice ) throws DsplException, JAXBException, IOException, NoSuchAlgorithmException { DataSet dataset = slice.getDataSet( ); DsplParser dsplParser = dataset.getParser( ); Table table = slice.getTable( ); TableParser parser = dsplParser.getTableParser( table ); // the existence of ExtendedCsvParser complicates things because the data is split over multiple // tables so the hash must hash over all the data files // we handle this by computing a hash for each file then computing the hash of the individual hashes if ( parser instanceof MultipleFileTableParser ) { MultipleFileTableParser multiFileParser = ( MultipleFileTableParser ) parser; List<URL> fileList = multiFileParser.getDataFiles( slice ); List<byte[]> fileHashList = new ArrayList<byte[]>( ); for ( URL file : fileList ) { fileHashList.add( MD5Checksum.createChecksum( file.openStream( ) ) ); } int totalSize = 0; for ( byte[] hash : fileHashList ) { totalSize += hash.length; } int currentSize = 0; byte[] allHashes = new byte[totalSize]; for ( byte[] hash : fileHashList ) { System.arraycopy( hash, 0, allHashes, currentSize, hash.length ); currentSize += hash.length; } return MD5Checksum.getMD5Checksum( allHashes ); } else { InputStream hashIn = DsplHelper.getTableInputStream( slice ); return MD5Checksum.getMD5Checksum( hashIn ); } } public static String getCalculatedHash( Concept concept ) throws DsplException, JAXBException, IOException, NoSuchAlgorithmException { DataSet dataset = concept.getDataSet( ); DsplParser dsplParser = dataset.getParser( ); Table table = concept.getTable( ); TableParser parser = dsplParser.getTableParser( table ); // the existence of ExtendedCsvParser complicates things because the data is split over multiple // tables so the hash must hash over all the data files // we handle this by computing a hash for each file then computing the hash of the individual hashes if ( parser instanceof MultipleFileTableParser ) { MultipleFileTableParser multiFileParser = ( MultipleFileTableParser ) parser; List<URL> fileList = multiFileParser.getDataFiles( concept ); List<byte[]> fileHashList = new ArrayList<byte[]>( ); for ( URL file : fileList ) { fileHashList.add( MD5Checksum.createChecksum( file.openStream( ) ) ); } int totalSize = 0; for ( byte[] hash : fileHashList ) { totalSize += hash.length; } int currentSize = 0; byte[] allHashes = new byte[totalSize]; for ( byte[] hash : fileHashList ) { System.arraycopy( hash, 0, allHashes, currentSize, hash.length ); currentSize += hash.length; } return MD5Checksum.getMD5Checksum( allHashes ); } else { InputStream hashIn = DsplHelper.getTableInputStream( concept ); return MD5Checksum.getMD5Checksum( hashIn ); } } public static String getCachedHash( Slice slice ) throws DsplException, JAXBException, IOException { return getCachedHash( slice.getDataSet( ), slice.getTable( ), slice.getId( ) ); } public static String getCachedHash( Concept concept ) throws DsplException, JAXBException, IOException { return getCachedHash( concept.getDataSet( ), concept.getTable( ), concept.getId( ) ); } protected static String getCachedHash( DataSet dataset, Table table, String id ) throws DsplException, JAXBException, IOException { File cacheDirectory = getCacheDirectory( dataset, table, id ); for ( File file : cacheDirectory.listFiles( ) ) { String fileName = file.getName( ); if ( fileName.equals( HASH_FILE ) ) { BufferedReader in = new BufferedReader( new InputStreamReader( new FileInputStream( file ) ) ); try { return in.readLine( ); } finally { in.close( ); } } } return null; } public static void writeHash( Concept concept, String hash ) throws IOException, DsplException, NoSuchAlgorithmException, JAXBException { writeHash( getCacheDirectory( concept ), hash ); } public static void writeHash( Slice slice, String hash ) throws IOException, DsplException, NoSuchAlgorithmException, JAXBException { writeHash( getCacheDirectory( slice ), hash ); } public static void writeHash( File directory, String hash ) throws IOException, DsplException, NoSuchAlgorithmException, JAXBException { File hashFile = new File( directory, HASH_FILE ); BufferedWriter out = new BufferedWriter( new OutputStreamWriter( new FileOutputStream( hashFile ) ) ); out.write( hash ); out.close( ); } protected static String getExtension( File file ) { if ( file == null ) return null; String name = file.getName( ); int index = name.lastIndexOf( "." ); return name.substring( index + 1, name.length( ) ); } }