/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.luciddbbulkloader;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.SQLWarning;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.vfs2.FileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.DBCache;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMeta;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.util.StreamLogger;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Performs a bulk load to a LucidDB table.
*
* Based on Sven Boden's Oracle Bulk Loader step
*
* @author matt
* @since 14-nov-2008
*/
public class LucidDBBulkLoader extends BaseStep implements StepInterface {
private static Class<?> PKG = LucidDBBulkLoaderMeta.class; // for i18n purposes, needed by Translator2!!
private LucidDBBulkLoaderMeta meta;
private LucidDBBulkLoaderData data;
// private SqlRunner sqlRunner;
public LucidDBBulkLoader( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr,
TransMeta transMeta, Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
public boolean execute( LucidDBBulkLoaderMeta meta, boolean wait ) throws KettleException {
Runtime rt = Runtime.getRuntime();
try {
String tableName = environmentSubstitute( meta.getTableName() );
// 1) Set up the FIFO folder, create the directory and path to it...
//
String fifoVfsDirectory = environmentSubstitute( meta.getFifoDirectory() );
FileObject directory = KettleVFS.getFileObject( fifoVfsDirectory, getTransMeta() );
directory.createFolder();
String fifoDirectory = KettleVFS.getFilename( directory );
// 2) Create the FIFO file using the "mkfifo" command...
// Make sure to log all the possible output, also from STDERR
//
data.fifoFilename = KettleVFS.getFilename( directory ) + Const.FILE_SEPARATOR + tableName + ".csv";
data.bcpFilename = KettleVFS.getFilename( directory ) + Const.FILE_SEPARATOR + tableName + ".bcp";
File fifoFile = new File( data.fifoFilename );
if ( !fifoFile.exists() ) {
String mkFifoCmd = "mkfifo " + data.fifoFilename + "";
logBasic( "Creating FIFO file using this command : " + mkFifoCmd );
Process mkFifoProcess = rt.exec( mkFifoCmd );
StreamLogger errorLogger = new StreamLogger( log, mkFifoProcess.getErrorStream(), "mkFifoError" );
StreamLogger outputLogger = new StreamLogger( log, mkFifoProcess.getInputStream(), "mkFifoOuptut" );
new Thread( errorLogger ).start();
new Thread( outputLogger ).start();
int result = mkFifoProcess.waitFor();
if ( result != 0 ) {
throw new Exception( "Return code " + result + " received from statement : " + mkFifoCmd );
}
}
// 3) Make a connection to LucidDB for sending SQL commands
// (Also, we need a clear cache for getting up-to-date target metadata)
DBCache.getInstance().clear( meta.getDatabaseMeta().getName() );
if ( meta.getDatabaseMeta() == null ) {
logError( BaseMessages.getString( PKG, "LuciDBBulkLoader.Init.ConnectionMissing", getStepname() ) );
return false;
}
data.db = new Database( this, meta.getDatabaseMeta() );
data.db.shareVariablesWith( this );
// Connect to the database
if ( getTransMeta().isUsingUniqueConnections() ) {
synchronized ( getTrans() ) {
data.db.connect( getTrans().getTransactionId(), getPartitionID() );
}
} else {
data.db.connect( getPartitionID() );
}
logBasic( "Connected to LucidDB" );
// 4) Now we are ready to create the LucidDB FIFO server that will handle the actual bulk loading.
//
String fifoServerStatement = "";
fifoServerStatement += "create or replace server " + meta.getFifoServerName() + Const.CR;
fifoServerStatement += "foreign data wrapper sys_file_wrapper" + Const.CR;
fifoServerStatement += "options (" + Const.CR;
fifoServerStatement += "directory '" + fifoDirectory + "'," + Const.CR;
fifoServerStatement += "file_extension 'csv'," + Const.CR;
fifoServerStatement += "with_header 'no'," + Const.CR;
fifoServerStatement += "num_rows_scan '0'," + Const.CR;
fifoServerStatement += "lenient 'no');" + Const.CR;
logBasic( "Creating LucidDB fifo_server with the following command: " + fifoServerStatement );
data.db.execStatements( fifoServerStatement );
// 5) Set the error limit in the LucidDB session
// REVIEW jvs 13-Dec-2008: is this guaranteed to retain the same
// connection?
String errorMaxStatement = "";
errorMaxStatement += "alter session set \"errorMax\" = " + meta.getMaxErrors() + ";" + Const.CR;
logBasic( "Setting error limit in LucidDB session with the following command: " + errorMaxStatement );
data.db.execStatements( errorMaxStatement );
// 6) Now we also need to create a bulk loader file .bcp
//
createBulkLoadConfigFile( data.bcpFilename );
// 7) execute the actual load command!
// This will actually block until the load is done in the
// separate execution thread; see notes in executeLoadCommand
// on why it's important for this to occur BEFORE
// opening our end of the FIFO.
//
executeLoadCommand( tableName );
// 8) We have to write rows to the FIFO file later on.
data.fifoStream = new BufferedOutputStream( new FileOutputStream( fifoFile ) );
} catch ( Exception ex ) {
throw new KettleException( ex );
}
return true;
}
private void executeLoadCommand( String tableName ) throws KettleException {
String loadCommand = "";
loadCommand += "insert into " + data.schemaTable + Const.CR;
loadCommand += "select * from " + meta.getFifoServerName() + ".\"DEFAULT\"." + tableName + Const.CR;
// NOTE jvs 13-Dec-2008: We prepare the SQL before spawning the thread
// to execute it. The reason is that if a SQL validation exception
// occurs during preparation (e.g. due to datatype mismatch), we don't
// even want to open our end of the FIFO, otherwise we can get stuck
// since the server is never going to open its end until execution,
// which ain't gonna happen in that case.
logBasic( "Preparing load command : " + Const.CR + loadCommand );
PreparedStatement ps = data.db.prepareSQL( loadCommand );
data.sqlRunner = new SqlRunner( data, ps );
data.sqlRunner.start();
}
private void createBulkLoadConfigFile( String bcpFilename ) throws KettleException {
File bcpFile = new File( bcpFilename );
FileWriter writer = null;
try {
writer = new FileWriter( bcpFile );
// The first Line is the version number, usually 9.0
//
writer.write( "9.0" + Const.CR );
// The second line contains the number of columns...
//
writer.write( meta.getFieldTable().length + Const.CR );
RowMetaInterface targetFieldMeta = meta.getRequiredFields( this );
data.bulkFormatMeta = new ValueMetaInterface[meta.getFieldTable().length];
// The next block lists the columns from 1..N where N is the number of columns...
//
for ( int i = 0; i < meta.getFieldTable().length; i++ ) {
ValueMetaInterface field = getInputRowMeta().getValueMeta( data.keynrs[i] );
// Col 1 : the column number (i+1)
//
writer.write( Integer.toString( i + 1 ) + " " );
// Col 2 : the data type
//
String dataType = null;
switch ( field.getType() ) {
case ValueMetaInterface.TYPE_STRING:
dataType = "SQLVARCHAR";
break;
case ValueMetaInterface.TYPE_BIGNUMBER:
dataType = "SQLREAL";
break;
case ValueMetaInterface.TYPE_NUMBER:
dataType = "SQLFLT8";
break;
case ValueMetaInterface.TYPE_INTEGER:
dataType = "SQLBIGINT";
break;
case ValueMetaInterface.TYPE_DATE:
// Use the actual datatypes in the target table to
// determine how to create the control file column
// definition for date/time fields.
if ( targetFieldMeta.getValueMetaList().get( i ).getOriginalColumnType() == Types.DATE ) {
data.bulkFormatMeta[i] = data.bulkDateMeta;
dataType = "SQLDATE";
} else {
data.bulkFormatMeta[i] = data.bulkTimestampMeta;
dataType = "SQLTIMESTAMP";
}
break;
// REVIEW jvs 13-Dec-2008: enable boolean support?
case ValueMetaInterface.TYPE_BOOLEAN:
dataType = "SQLCHAR";
break;
default:
dataType = "SQLVARCHAR";
break;
}
writer.write( dataType + " " );
// Col 3 : an ignored column (start position or something like that)
//
writer.write( "0 " );
// Col 4 : the data length, just put the length metadata in here
//
if ( field.getLength() == -1 ) {
writer.write( "1000 " );
} else {
writer.write( "" + field.getLength() + " " );
}
// Col 5 : The separator is also ignored, we're going to put a tab in here, like in the sample
//
writer.write( "\"\\t\" " ); // "\t"
// Col 6 : the column number again...
//
writer.write( Integer.toString( i + 1 ) + " " );
// Col 7 : The identifier
//
writer.write( meta.getFieldTable()[i] + " " );
// Col 8 : Collation / Format : leave it empty/default at the time being
//
writer.write( "\"\" " ); // ""
// Newline to finish
//
writer.write( Const.CR );
}
} catch ( Exception e ) {
throw new KettleException( "Unable to create BCP control file", e );
} finally {
// That's it, close shop
//
if ( writer != null ) {
try {
writer.close();
} catch ( IOException e ) {
throw new KettleException( "Unable to close BCP file '" + bcpFilename + "'", e );
}
}
}
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (LucidDBBulkLoaderMeta) smi;
data = (LucidDBBulkLoaderData) sdi;
try {
Object[] r = getRow(); // Get row from input rowset & set row busy!
if ( r == null ) {
// no more input to be expected...
setOutputDone();
// Close the fifo file...
//
data.fifoStream.close();
data.fifoStream = null;
// wait for the INSERT statement to finish and check for any
// error and/or warning...
data.sqlRunner.join();
SqlRunner sqlRunner = data.sqlRunner;
data.sqlRunner = null;
for ( String warning : sqlRunner.warnings ) {
// REVIEW jvs 13-Dec-2008: It would be nice if there were
// a logWarning instead?
logError( " (WARNING) " + warning );
}
sqlRunner.checkExcn();
// If there was no fatal exception, but there were warnings,
// retrieve the rejected row count
if ( !sqlRunner.warnings.isEmpty() ) {
ResultSet rs =
data.db.openQuery( "SELECT PARAM_VALUE "
+ "FROM SYS_ROOT.USER_SESSION_PARAMETERS "
+ "WHERE PARAM_NAME='lastRowsRejected'" );
try {
rs.next();
setLinesRejected( rs.getInt( 1 ) );
} finally {
rs.close();
}
}
return false;
}
if ( first ) {
first = false;
// Cache field indexes.
//
data.keynrs = new int[meta.getFieldStream().length];
for ( int i = 0; i < data.keynrs.length; i++ ) {
data.keynrs[i] = getInputRowMeta().indexOfValue( meta.getFieldStream()[i] );
}
// execute the client statement...
//
execute( meta, true );
}
writeRowToBulk( getInputRowMeta(), r );
putRow( getInputRowMeta(), r );
incrementLinesOutput();
return true;
} catch ( Exception e ) {
logError( BaseMessages.getString( PKG, "LucidDBBulkLoader.Log.ErrorInStep" ), e );
setErrors( 1 );
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
}
private void writeRowToBulk( RowMetaInterface rowMeta, Object[] r ) throws KettleException {
try {
// So, we have this output stream to which we can write CSV data to.
// Basically, what we need to do is write the binary data (from strings to it as part of this proof of concept)
//
// The data format required is essentially:
//
for ( int i = 0; i < data.keynrs.length; i++ ) {
if ( i > 0 ) {
// Write a separator
//
data.fifoStream.write( data.separator );
}
int index = data.keynrs[i];
ValueMetaInterface valueMeta = rowMeta.getValueMeta( index );
Object valueData = r[index];
if ( valueData != null ) {
switch ( valueMeta.getType() ) {
case ValueMetaInterface.TYPE_STRING:
data.fifoStream.write( data.quote );
if ( valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i] ) {
// We had a string, just dump it back.
data.fifoStream.write( (byte[]) valueData );
} else {
data.fifoStream.write( valueMeta.getString( valueData ).getBytes() );
}
data.fifoStream.write( data.quote );
break;
case ValueMetaInterface.TYPE_INTEGER:
if ( valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i] ) {
data.fifoStream.write( (byte[]) valueData );
} else {
data.fifoStream.write( Long.toString( valueMeta.getInteger( valueData ) ).getBytes() );
}
break;
case ValueMetaInterface.TYPE_DATE:
// REVIEW jvs 13-Dec-2008: Is it OK to ignore
// FieldFormatOk like this?
/*
* if (false && valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i]) {
* data.fifoStream.write((byte[])valueData); } else {
*/
Date date = valueMeta.getDate( valueData );
// Convert it to the ISO timestamp format
// "yyyy-MM-dd HH:mm:ss" // or date format
// "yyyy-MM-dd" as appropriate, since LucidDB
// follows SQL:2003 here
data.fifoStream.write( data.bulkFormatMeta[i].getString( date ).getBytes() );
// }
break;
case ValueMetaInterface.TYPE_BOOLEAN:
if ( valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i] ) {
data.fifoStream.write( (byte[]) valueData );
} else {
data.fifoStream.write( Boolean.toString( valueMeta.getBoolean( valueData ) ).getBytes() );
}
break;
case ValueMetaInterface.TYPE_NUMBER:
if ( valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i] ) {
data.fifoStream.write( (byte[]) valueData );
} else {
data.fifoStream.write( Double.toString( valueMeta.getNumber( valueData ) ).getBytes() );
}
break;
case ValueMetaInterface.TYPE_BIGNUMBER:
if ( valueMeta.isStorageBinaryString() && meta.getFieldFormatOk()[i] ) {
data.fifoStream.write( (byte[]) valueData );
} else {
data.fifoStream.write( valueMeta.getString( valueData ).getBytes() );
}
break;
default:
break;
}
}
}
// finally write a newline
//
data.fifoStream.write( data.newline );
} catch ( Exception e ) {
throw new KettleException( "Error serializing rows of data to the fifo file", e );
}
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (LucidDBBulkLoaderMeta) smi;
data = (LucidDBBulkLoaderData) sdi;
if ( super.init( smi, sdi ) ) {
data.quote = "\"".getBytes();
data.separator = ",".getBytes();
data.newline = Const.CR.getBytes();
data.bulkTimestampMeta = new ValueMeta( "timestampMeta", ValueMetaInterface.TYPE_DATE );
data.bulkTimestampMeta.setConversionMask( "yyyy-MM-dd HH:mm:ss" );
data.bulkTimestampMeta.setStringEncoding( meta.getEncoding() );
data.bulkDateMeta = new ValueMeta( "dateMeta", ValueMetaInterface.TYPE_DATE );
data.bulkDateMeta.setConversionMask( "yyyy-MM-dd" );
data.bulkDateMeta.setStringEncoding( meta.getEncoding() );
data.bulkNumberMeta = new ValueMeta( "numberMeta", ValueMetaInterface.TYPE_NUMBER );
data.bulkNumberMeta.setConversionMask( "#.#" );
data.bulkNumberMeta.setGroupingSymbol( "," );
data.bulkNumberMeta.setDecimalSymbol( "." );
data.bulkNumberMeta.setStringEncoding( meta.getEncoding() );
data.bufferSize = Const.toInt( environmentSubstitute( meta.getBufferSize() ), 100000 );
// Allocate the buffer
//
data.rowBuffer = new byte[data.bufferSize][];
data.bufferIndex = 0;
// Schema-table combination...
data.schemaTable =
meta.getDatabaseMeta().getQuotedSchemaTableCombination(
environmentSubstitute( meta.getSchemaName() ), environmentSubstitute( meta.getTableName() ) );
return true;
}
return false;
}
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (LucidDBBulkLoaderMeta) smi;
data = (LucidDBBulkLoaderData) sdi;
// Close the output streams if still needed.
//
try {
if ( data.fifoStream != null ) {
data.fifoStream.close();
}
// Stop the SQL execution thread
if ( data.sqlRunner != null ) {
data.sqlRunner.join();
data.sqlRunner = null;
}
// And finally, release the database connection
if ( data.db != null ) {
data.db.disconnect();
data.db = null;
}
} catch ( Exception e ) {
setErrors( 1L );
logError( "Unexpected error encountered while closing the client connection", e );
}
super.dispose( smi, sdi );
}
static class SqlRunner extends Thread {
private LucidDBBulkLoaderData data;
private PreparedStatement ps;
private SQLException ex;
List<String> warnings;
SqlRunner( LucidDBBulkLoaderData data, PreparedStatement ps ) {
this.data = data;
this.ps = ps;
warnings = new ArrayList<String>();
}
public void run() {
try {
// TODO jvs 12-Dec-2008: cross-check result against actual number
// of rows sent.
ps.executeUpdate();
// Pump out any warnings and save them.
SQLWarning warning = ps.getWarnings();
while ( warning != null ) {
warnings.add( warning.getMessage() );
warning = warning.getNextWarning();
}
} catch ( SQLException ex ) {
this.ex = ex;
} finally {
try {
data.db.closePreparedStatement( ps );
} catch ( KettleException ke ) {
// not much we can do with this
} finally {
ps = null;
}
}
}
void checkExcn() throws SQLException {
// This is called from the main thread context to rethrow any saved
// excn.
if ( ex != null ) {
throw ex;
}
}
}
}