/* * (C) Copyright IBM Corp. 2008 * * LICENSE: Eclipse Public License v1.0 * http://www.eclipse.org/legal/epl-v10.html */ package com.ibm.db2j; import java.io.File; import java.sql.ParameterMetaData; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Arrays; import java.util.Hashtable; import java.util.Map; import org.apache.derby.iapi.error.StandardException; import org.apache.derby.iapi.store.access.Qualifier; import org.apache.derby.iapi.types.DataValueDescriptor; import org.apache.derby.iapi.types.SQLChar; import org.apache.derby.vti.IFastPath; import org.apache.derby.vti.VTIEnvironment; import org.apache.derby.vti.VTIMetaDataTemplate; import com.ibm.db2j.tools.ImportExportSQLException; import com.ibm.gaiandb.GaianChildVTI; import com.ibm.gaiandb.GaianDBConfig; import com.ibm.gaiandb.GaianNode; import com.ibm.gaiandb.GaianResultSetMetaData; import com.ibm.gaiandb.Logger; import com.ibm.gaiandb.RowsFilter; import com.ibm.gaiandb.Util; import com.ibm.gaiandb.diags.GDBMessages; import com.ibm.gaiandb.lite.LiteParameterMetaData; /** * @author DavidVyvyan */ public class FileImport extends AbstractVTI implements GaianChildVTI { // Note does not support IFastPath yet // Use PROPRIETARY notice if class contains a main() method, otherwise use COPYRIGHT notice. public static final String COPYRIGHT_NOTICE = "(c) Copyright IBM Corp. 2008"; private static final Logger logger = new Logger( "FileImport", 30 ); private final String filePath; // Count of all columns in the File private int columnCount = 0; private String[] currentRowOfStringsFromFile = null; private DataValueDescriptor[] dvdRowTemplateForFile = null; private String[] blankRowOfStrings = {null}; private int[] projectedColumns = null; private Qualifier[][] qualifiers = null; // private static String getFile( String s ) throws IOException { // InputStream is = null; // String ucs = s.toUpperCase(); // if ( ucs.endsWith(".ZIP") ) is = new ZipInputStream( new FileInputStream(s) ); // if ( ucs.endsWith(".GZIP") ) is = new GZIPInputStream( new FileInputStream(s) ); // // if ( null != is ) { // System.out.println("unizipping..."); // s = s.substring(0, s.lastIndexOf('.')); // OutputStream os = new FileOutputStream(s); // // byte[] bytes = new byte[1024]; // int numBytes; // while (0 < (numBytes = is.read(bytes))) // os.write(bytes, 0, numBytes); // // is.close(); os.close(); // } // return s; // } private static final Map<String, String> controlFiles = new Hashtable<String, String>(); private static final String EXTN_PROPERTIES = ".properties"; private static final String FILE_IMPORT_DEFAULT_PROPERTIES = "FileImportDefaults" + EXTN_PROPERTIES; private static final String FILE_IMPORT_CONTROLS_DIR = "FileImportControls" + File.separatorChar; /** * The "control file" is used by the Derby FileImport class, and contains properties that describe the structure of the File to be processed. * The resolution of the location of a control file is best described with an example: * Assuming a workspace path for the GaianDB node at '/node-workspace', the control file for a data file '/a/b/file.dat' will be searched for and * resolved in this order of precedence: * * 1) /a/b/file.dat.properties * 2) /a/b/FileImportDefaults.properties * 3) /node-workspace/FileImportControls/a/b/file.dat.properties * 4) /node-workspace/FileImportControls/a/b/FileImportDefaults.properties * 5) /node-workspace/FileImportControls/b/file.dat.properties * 6) /node-workspace/FileImportControls/b/FileImportDefaults.properties * 7) /node-workspace/FileImportControls/file.dat.properties * 8) /node-workspace/FileImportControls/FileImportDefaults.properties * * If no control file is resolved using this process, the default CSV format is assumed. * * @param s The path to the data file. * @return The path to the control file for the given data file. */ public static String getControlFile( String datafilePath ) { String controlfilePath = null, gdbCtrlDir = null; // System.out.println("ctrl file exists: " + s + ".properties: " + new File(s+".properties").exists()); // System.out.println("Default line separator: " + System.getProperty("line.separator")); // System.out.println("Default codeset: " + (new InputStreamReader(System.in)).getEncoding()); // String ucs = s.toUpperCase(); // if ( ucs.endsWith(".ZIP") || ucs.endsWith(".GZIP") ) s = s.substring(0, s.lastIndexOf('.')); // NOTE : By convention, all variable names here ending in 'Dir' designate directory names INCLUDING a folder separator ('/' or '\') at the end. File datafile = new File(datafilePath); String datafileDir = datafile.getParent() + File.separatorChar; String datafileName = datafile.getName(); for ( String candidate : new String[] { datafilePath+EXTN_PROPERTIES, datafileDir + FILE_IMPORT_DEFAULT_PROPERTIES } ) if ( new File(candidate).exists() ) { controlfilePath = candidate; break; } if ( null == controlfilePath ) { try { gdbCtrlDir = GaianNode.getWorkspaceDir(); } catch (Exception e) { logger.logInfo("getControlFile("+datafilePath+") unable to resolve install path: " + e); } logger.logDetail("Workspace path: " + gdbCtrlDir); if ( null != gdbCtrlDir ) { gdbCtrlDir += File.separatorChar + FILE_IMPORT_CONTROLS_DIR; int idx; String relativeDir = datafileDir; if ( Util.isWindowsOS && -1 != (idx = relativeDir.indexOf(':')) ) relativeDir = relativeDir.substring(idx+1); // Don't use File.separatorChar on Windows... '/' is valid everywhere. while ( 0 < relativeDir.length() && Util.isSeparatorChar( relativeDir.charAt(0) ) ) relativeDir = relativeDir.substring(1); logger.logInfo("Searching under config folder " + gdbCtrlDir + ", all sub-locations of relativeDir: " + relativeDir); for ( idx = 0; null == controlfilePath && -1 < idx; relativeDir = relativeDir.substring(idx+1) ) { logger.logDetail("Testing relativeDir: " + relativeDir); for ( String candidate : new String[] { gdbCtrlDir + relativeDir + datafileName+EXTN_PROPERTIES, gdbCtrlDir + relativeDir + FILE_IMPORT_DEFAULT_PROPERTIES } ) { logger.logDetail("Resolving path for: " + datafileName + ", candidate: " + candidate); if ( new File(candidate).exists() ) { controlfilePath = candidate; break; } } idx = Util.indexOfFileSeparator( relativeDir ); // Don't use File.separatorChar on Windows... '/' is valid everywhere. } } } logger.logInfo("Resolved controlfilePath: " + controlfilePath); if ( null != controlfilePath ) controlFiles.put(datafilePath, controlfilePath); else logger.logWarning(GDBMessages.CONFIG_LT_SET_CONTROL_FILE_NOT_FOUND, "No control file found for: " + datafilePath + " (defaulting to csv format) - i.e. Couldn't resolve '" + datafileName + ".properties' or 'FileImportDefaults.properties' at data file location or at workspace locations under: " + gdbCtrlDir ); return controlfilePath; } public GaianResultSetMetaData getMetaData() { try { return new GaianResultSetMetaData( fileImportMetaData, null ); } catch (Exception e) { e.printStackTrace(); } return null; } private final ResultSetMetaData fileImportMetaData; private class FileImportMedataData extends VTIMetaDataTemplate { private static final int DEFAULT_COLUMN_WIDTH = 255; private final ResultSetMetaData md; private FileImportMedataData( ResultSetMetaData rsmd ) { md = rsmd; } public int getColumnCount() throws SQLException { return md.getColumnCount(); } public String getColumnName(int i) throws SQLException { return md.getColumnName(i); } public int getColumnType(int i) throws SQLException { return md.getColumnType(i); } public int isNullable(int i) throws SQLException { return md.isNullable(i); } public String getColumnTypeName(int i) { return "VARCHAR"; } public int getColumnDisplaySize(int i) throws SQLException { int w = md.getColumnDisplaySize(i); // The following condition is true if the file is not ASCII_FIXED (by default it is ASCII_DELIMITED). if ( 0x7fffffff == w ) return DEFAULT_COLUMN_WIDTH; return w; } public boolean isWrapperFor(Class<?> iface) throws SQLException { return false; } public <T> T unwrap(Class<T> iface) throws SQLException { return null; } } private final FileImportDerby fileImport; private class FileImportDerby extends com.ibm.db2j.tools.FileImport { public FileImportDerby(String filePath, String controlFilePath) throws Exception { super(filePath, controlFilePath); } public String[] getFetchedRow() { return nextRow; } } /** * This is the entry point for the VTI. * * @param s The file path * @throws Exception */ public FileImport(String s) throws Exception { s = GaianDBConfig.resolvePathTags(s); fileImport = new FileImportDerby( s, getControlFile(s) ); // super(/*getFile(s)*/ s, getControlFile(s)); fileImportMetaData = new FileImportMedataData( fileImport.getMetaData() ); // System.out.println("CFR: cdef " + getControlFileReader().getColumnDefinition() + ", format " + getControlFileReader().getFormat()); filePath = s; columnCount = getMetaData().getColumnCount(); projectedColumns = new int[ columnCount ]; for ( int i=0; i<columnCount; i++ ) projectedColumns[i] = i+1; // 1-based dvdRowTemplateForFile = new DataValueDescriptor[ columnCount ]; for ( int i=0; i<columnCount; i++ ) dvdRowTemplateForFile[i] = new SQLChar(); } @Override public boolean pushProjection(VTIEnvironment arg0, int[] arg1) throws SQLException { logger.logThreadDetail("Entered FileImport.pushProjection(), projection: " + Util.intArrayAsString(arg1)); if ( null != arg1) projectedColumns = arg1; return true; } @Override public void setQualifiers(VTIEnvironment vtie, Qualifier[][] qual) throws SQLException { logger.logThreadDetail("Entered FileImport.setQualifiers(), qualifiers: " + RowsFilter.reconstructSQLWhereClause(qual)); qualifiers = qual; } @Override public boolean executeAsFastPath() throws StandardException, SQLException { reinitialise(); // necessary if this FileImport is not a data source of Gaian... (Gaian would explicitly invoke it when recycling the source) return true; // nothing else to execute } /** * Overrides IFastPath.nextRow() - Derby API method used when this VTI is referenced directly in SQL, or invoked by AbstractVTI. * NOTE: Column indexes in the row[] are relative to the physical source, i.e. the File's columns (i.e NOT a logical table set of columns) */ @Override public int nextRow( final DataValueDescriptor[] vtiRow ) throws SQLException { // 'vtiRow' contains DataValueDescriptor col type classes for the queried columns. // The nextRow String types from the file will get converted into the row types using these Derby DataValueDescriptor type classes. //repeat until we get a non blank line that matches the qualifier conditions do { // System.out.println("ImportAbstract.next()..."); try { if ( false == fileImport.next() ) return IFastPath.SCAN_COMPLETED; } // try { if ( false == fileImport.next() ) { logger.logInfo("NO MORE ROWS " + this); return IFastPath.SCAN_COMPLETED; } logger.logInfo("GOT ROW " + this);} catch ( ImportExportSQLException e ) { System.out.println("Check File: " + filePath); logger.logWarning(GDBMessages.DSWRAPPER_FILE_IMPORT_NEXT_ERROR_SQL, "Error in next() while importing " + filePath + e); return IFastPath.SCAN_COMPLETED; } catch ( Exception e ) { logger.logException(GDBMessages.DSWRAPPER_FILE_IMPORT_POSSIBLE_STRUCTURE_ERROR, "FileImport exception in next(). Possible structure issue with file " + filePath + ", e.g. missing final record delimiter at EOF: " + e,e ); return IFastPath.SCAN_COMPLETED; } currentRowOfStringsFromFile = fileImport.getFetchedRow(); if (!Arrays.equals(currentRowOfStringsFromFile,blankRowOfStrings)) { for ( int i=0; i<projectedColumns.length; i++ ) { int pColID = projectedColumns[i]-1; try { // if ( null == currentRowOfStringsFromFile[pcolID] ) { // logger.logWarning("File was modified during row fetch - truncated stream - ending fetch for this source"); // return false; // } vtiRow[pColID].setValue( currentRowOfStringsFromFile[pColID] ); // This does type conversion from String to the expected type for this column in vtiRow[] } catch ( ArrayIndexOutOfBoundsException e ) { logger.logException( GDBMessages.DSWRAPPER_LOGICAL_COLUMN_REF_ERROR, "Error referencing Physical column " + (pColID+1) + " which does not exist in File " + filePath + ". Null Field will be returned for this node", e); vtiRow[pColID].setToNull(); } catch (Exception e) { String controlfilePath = controlFiles.get(filePath); logger.logException( GDBMessages.DSWRAPPER_LOGICAL_COLUMN_REF_ERROR, "Unable to set cell value from file's column " + (pColID+1) + (currentRowOfStringsFromFile.length > pColID ? ", column value: [" + currentRowOfStringsFromFile[pColID] + "]" : "") + " to the intended cell column type: " + vtiRow[pColID].getTypeName() + ( null == controlfilePath ? " (using default formatting - no control file used)" : " (formatting control file: " + controlfilePath + ")" ) + ", cause: ", e ); vtiRow[pColID].setToNull(); } } } } while (Arrays.equals(currentRowOfStringsFromFile,blankRowOfStrings)|| //The line is blank, get the next one (null != qualifiers && false == RowsFilter.testQualifiers( vtiRow, qualifiers )) );//The line doesn't match qualifier conditions, get the next one return IFastPath.GOT_ROW; } public int getRowCount() throws Exception { // Note row count cannot be cached because file may change between invocations. // Use a row of String DVDs to hold the file's col values which will be tested // against qualifiers - note that String has lowest precedence, so the types of the constant // values against which the strings are compared will always take precedence, meaning the strings // will be converted to whichever types the constants are when they are compared. fileImport.close(); int i = 0; while ( fetchNextRow(dvdRowTemplateForFile) ) i++; // don't close() now - as we need any other calls to nextRow() to return SCAN_COMPLETED (e.g. in explain queries) return i; } public boolean reinitialise() throws SQLException { // Just need to cleanup the underlying FileImport object. fileImport.close(); currentRowOfStringsFromFile = null; // logger.logInfo("REINITIALISED " + this); return true; } public void close() throws SQLException { reinitialise(); } public boolean isBeforeFirst() { return null == currentRowOfStringsFromFile; } public boolean isScrollable() { return false; } // This method is called by the udp driver when in LITE mode public ParameterMetaData getParameterMetaData() throws SQLException { return !GaianNode.IS_UDP_DRIVER_EXCLUDED_FROM_RELEASE && GaianNode.isLite() ? new LiteParameterMetaData() : null; } @Override public double getEstimatedCostPerInstantiation(VTIEnvironment arg0) throws SQLException { return 0; } @Override public double getEstimatedRowCount(VTIEnvironment arg0) throws SQLException { return 0; } @Override public boolean supportsMultipleInstantiations(VTIEnvironment arg0) throws SQLException { return false; } }