/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.pgbulkloader;
//
// The "designer" notes of the PostgreSQL bulkloader:
// ----------------------------------------------
//
// Let's see how fast we can push data down the tube with the use of COPY FROM STDIN
//
//
import java.math.BigDecimal;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.postgresql.copy.PGCopyOutputStream;
import org.postgresql.PGConnection;
/**
* Performs a bulk load to a postgres table.
*
* Based on (copied from) Sven Boden's Oracle Bulk Loader step
*
* @author matt
* @since 28-mar-2008
*/
public class PGBulkLoader extends BaseStep implements StepInterface {
private static Class<?> PKG = PGBulkLoaderMeta.class; // for i18n purposes, needed by Translator2!!
private PGBulkLoaderMeta meta;
private PGBulkLoaderData data;
private PGCopyOutputStream pgCopyOut;
public PGBulkLoader( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
/**
* Get the contents of the control file as specified in the meta object
*
* @param meta
* the meta object to model the control file after
*
* @return a string containing the control file contents
*/
public String getCopyCommand( ) throws KettleException {
DatabaseMeta dm = meta.getDatabaseMeta();
String loadAction = environmentSubstitute( meta.getLoadAction() );
StringBuilder contents = new StringBuilder( 500 );
String tableName =
dm.getQuotedSchemaTableCombination(
environmentSubstitute( meta.getSchemaName() ), environmentSubstitute( meta.getTableName() ) );
// Set the date style...
//
// contents.append("SET DATESTYLE ISO;"); // This is the default but we set it anyway...
// contents.append(Const.CR);
// Create a Postgres / Greenplum COPY string for use with a psql client
if ( loadAction.equalsIgnoreCase( "truncate" ) ) {
contents.append( "TRUNCATE TABLE " );
contents.append( tableName + ";" );
contents.append( Const.CR );
}
contents.append( "COPY " );
// Table name
contents.append( tableName );
// Names of columns
contents.append( " ( " );
String[] streamFields = meta.getFieldStream();
String[] tableFields = meta.getFieldTable();
if ( streamFields == null || streamFields.length == 0 ) {
throw new KettleException( "No fields defined to load to database" );
}
for ( int i = 0; i < streamFields.length; i++ ) {
if ( i != 0 ) {
contents.append( ", " );
}
contents.append( dm.quoteField( tableFields[i] ) );
}
contents.append( " ) " );
// The "FROM" filename
contents.append( " FROM STDIN" ); // FIFO file
// The "FORMAT" clause
contents.append( " WITH CSV DELIMITER AS '" ).append( environmentSubstitute( meta.getDelimiter() ) )
.append( "' QUOTE AS '" ).append(
environmentSubstitute( meta.getEnclosure() ) ).append( "'" );
contents.append( ";" ).append( Const.CR );
return contents.toString();
}
private void do_copy( PGBulkLoaderMeta meta, boolean wait ) throws KettleException {
Runtime rt = Runtime.getRuntime();
data.db = new Database( this, meta.getDatabaseMeta() );
String copyCmd = getCopyCommand( );
try {
if ( getTransMeta().isUsingUniqueConnections() ) {
synchronized ( getTrans() ) {
data.db.connect( getTrans().getTransactionId(), getPartitionID() );
}
} else {
data.db.connect( getPartitionID() );
}
logBasic( "Launching command: " + copyCmd );
pgCopyOut = new PGCopyOutputStream( (PGConnection) data.db.getConnection(), copyCmd );
} catch ( Exception ex ) {
throw new KettleException( "Error while preparing the COPY " + copyCmd, ex );
}
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (PGBulkLoaderMeta) smi;
data = (PGBulkLoaderData) sdi;
try {
Object[] r = getRow(); // Get row from input rowset & set row busy!
if ( r == null ) { // no more input to be expected...
setOutputDone();
// Close the output stream...
// will be null if no records (empty stream)
if ( data != null ) {
pgCopyOut.flush();
pgCopyOut.endCopy();
}
return false;
}
if ( first ) {
first = false;
// Cache field indexes.
//
data.keynrs = new int[meta.getFieldStream().length];
for ( int i = 0; i < data.keynrs.length; i++ ) {
data.keynrs[i] = getInputRowMeta().indexOfValue( meta.getFieldStream()[i] );
}
// execute the copy statement... pgCopyOut is setup there
//
do_copy( meta, true );
// Write rows of data hereafter...
//
}
writeRowToPostgres( getInputRowMeta(), r );
putRow( getInputRowMeta(), r );
incrementLinesOutput();
return true;
} catch ( Exception e ) {
logError( BaseMessages.getString( PKG, "GPBulkLoader.Log.ErrorInStep" ), e );
setErrors( 1 );
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
}
private void writeRowToPostgres( RowMetaInterface rowMeta, Object[] r ) throws KettleException {
try {
// So, we have this output stream to which we can write CSV data to.
// Basically, what we need to do is write the binary data (from strings to it as part of this proof of concept)
//
// Let's assume the data is in the correct format here too.
//
for ( int i = 0; i < data.keynrs.length; i++ ) {
if ( i > 0 ) {
// Write a separator
//
pgCopyOut.write( data.separator );
}
int index = data.keynrs[i];
ValueMetaInterface valueMeta = rowMeta.getValueMeta( index );
Object valueData = r[index];
if ( valueData != null ) {
switch ( valueMeta.getType() ) {
case ValueMetaInterface.TYPE_STRING:
pgCopyOut.write( data.quote );
// No longer dump the bytes for a Lazy Conversion;
// We need to escape the quote characters in every string
String quoteStr = new String( data.quote );
String escapedString = valueMeta.getString( valueData ).replace( quoteStr, quoteStr + quoteStr );
pgCopyOut.write( escapedString.getBytes() );
pgCopyOut.write( data.quote );
break;
case ValueMetaInterface.TYPE_INTEGER:
if ( valueMeta.isStorageBinaryString() ) {
pgCopyOut.write( (byte[]) valueData );
} else {
pgCopyOut.write( Long.toString( valueMeta.getInteger( valueData ) ).getBytes() );
}
break;
case ValueMetaInterface.TYPE_DATE:
// Format the date in the right format.
//
switch ( data.dateFormatChoices[i] ) {
// Pass the data along in the format chosen by the user OR in binary format...
//
case PGBulkLoaderMeta.NR_DATE_MASK_PASS_THROUGH:
if ( valueMeta.isStorageBinaryString() ) {
pgCopyOut.write( (byte[]) valueData );
} else {
String dateString = valueMeta.getString( valueData );
if ( dateString != null ) {
pgCopyOut.write( dateString.getBytes() );
}
}
break;
// Convert to a "YYYY-MM-DD" format
//
case PGBulkLoaderMeta.NR_DATE_MASK_DATE:
String dateString = data.dateMeta.getString( valueMeta.getDate( valueData ) );
if ( dateString != null ) {
pgCopyOut.write( dateString.getBytes() );
}
break;
// Convert to a "YYYY-MM-DD HH:MM:SS.mmm" format
//
case PGBulkLoaderMeta.NR_DATE_MASK_DATETIME:
String dateTimeString = data.dateTimeMeta.getString( valueMeta.getDate( valueData ) );
if ( dateTimeString != null ) {
pgCopyOut.write( dateTimeString.getBytes() );
}
break;
default:
throw new KettleException( "PGBulkLoader doesn't know how to handle date (neither passthrough, nor date or datetime for field " + valueMeta.getName() );
}
break;
case ValueMetaInterface.TYPE_TIMESTAMP:
// Format the date in the right format.
//
switch ( data.dateFormatChoices[i] ) {
// Pass the data along in the format chosen by the user OR in binary format...
//
case PGBulkLoaderMeta.NR_DATE_MASK_PASS_THROUGH:
if ( valueMeta.isStorageBinaryString() ) {
pgCopyOut.write( (byte[]) valueData );
} else {
String dateString = valueMeta.getString( valueData );
if ( dateString != null ) {
pgCopyOut.write( dateString.getBytes() );
}
}
break;
// Convert to a "YYYY-MM-DD" format
//
case PGBulkLoaderMeta.NR_DATE_MASK_DATE:
String dateString = data.dateMeta.getString( valueMeta.getDate( valueData ) );
if ( dateString != null ) {
pgCopyOut.write( dateString.getBytes() );
}
break;
// Convert to a "YYYY-MM-DD HH:MM:SS.mmm" format
//
case PGBulkLoaderMeta.NR_DATE_MASK_DATETIME:
String dateTimeString = data.dateTimeMeta.getString( valueMeta.getDate( valueData ) );
if ( dateTimeString != null ) {
pgCopyOut.write( dateTimeString.getBytes() );
}
break;
default:
throw new KettleException( "PGBulkLoader doesn't know how to handle timestamp (neither passthrough, nor date or datetime for field " + valueMeta.getName() );
}
break;
case ValueMetaInterface.TYPE_BOOLEAN:
if ( valueMeta.isStorageBinaryString() ) {
pgCopyOut.write( (byte[]) valueData );
} else {
pgCopyOut.write( Double.toString( valueMeta.getNumber( valueData ) ).getBytes() );
}
break;
case ValueMetaInterface.TYPE_NUMBER:
if ( valueMeta.isStorageBinaryString() ) {
pgCopyOut.write( (byte[]) valueData );
} else {
pgCopyOut.write( Double.toString( valueMeta.getNumber( valueData ) ).getBytes() );
}
break;
case ValueMetaInterface.TYPE_BIGNUMBER:
if ( valueMeta.isStorageBinaryString() ) {
pgCopyOut.write( (byte[]) valueData );
} else {
BigDecimal big = valueMeta.getBigNumber( valueData );
if ( big != null ) {
pgCopyOut.write( big.toString().getBytes() );
}
}
break;
default:
throw new KettleException( "PGBulkLoader doesn't handle the type " + valueMeta.getTypeDesc() );
}
}
}
// Now write a newline
//
pgCopyOut.write( data.newline );
} catch ( Exception e ) {
throw new KettleException( "Error serializing rows of data to the COPY command", e );
}
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (PGBulkLoaderMeta) smi;
data = (PGBulkLoaderData) sdi;
String enclosure = environmentSubstitute( meta.getEnclosure() );
String separator = environmentSubstitute( meta.getDelimiter() );
if ( super.init( smi, sdi ) ) {
if ( enclosure != null ) {
data.quote = enclosure.getBytes();
} else {
data.quote = new byte[] {};
}
if ( separator != null ) {
data.separator = separator.getBytes();
} else {
data.separator = new byte[] {};
}
data.newline = Const.CR.getBytes();
data.dateFormatChoices = new int[meta.getFieldStream().length];
for ( int i = 0; i < data.dateFormatChoices.length; i++ ) {
if ( Utils.isEmpty( meta.getDateMask()[i] ) ) {
data.dateFormatChoices[i] = PGBulkLoaderMeta.NR_DATE_MASK_PASS_THROUGH;
} else if ( meta.getDateMask()[i].equalsIgnoreCase( PGBulkLoaderMeta.DATE_MASK_DATE ) ) {
data.dateFormatChoices[i] = PGBulkLoaderMeta.NR_DATE_MASK_DATE;
} else if ( meta.getDateMask()[i].equalsIgnoreCase( PGBulkLoaderMeta.DATE_MASK_DATETIME ) ) {
data.dateFormatChoices[i] = PGBulkLoaderMeta.NR_DATE_MASK_DATETIME;
} else { // The default : just pass it along...
data.dateFormatChoices[i] = PGBulkLoaderMeta.NR_DATE_MASK_PASS_THROUGH;
}
}
return true;
}
return false;
}
}