/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.dimensionlookup;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.hash.ByteArrayHashMap;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaBoolean;
import org.pentaho.di.core.row.value.ValueMetaDate;
import org.pentaho.di.core.row.value.ValueMetaFactory;
import org.pentaho.di.core.row.value.ValueMetaInteger;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Manages a slowly changing dimension (lookup or update)
*
* @author Matt
* @since 14-may-2003
*/
public class DimensionLookup extends BaseStep implements StepInterface {
private static Class<?> PKG = DimensionLookupMeta.class; // for i18n purposes, needed by Translator2!!
private static final int CREATION_METHOD_AUTOINC = 1;
private static final int CREATION_METHOD_SEQUENCE = 2;
private static final int CREATION_METHOD_TABLEMAX = 3;
private int techKeyCreation;
private DimensionLookupMeta meta;
private DimensionLookupData data;
int[] columnLookupArray = null;
public DimensionLookup( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
protected void setMeta( DimensionLookupMeta meta ) {
this.meta = meta;
}
protected void setData( DimensionLookupData data ) {
this.data = data;
}
private void setTechKeyCreation( int method ) {
techKeyCreation = method;
}
private int getTechKeyCreation() {
return techKeyCreation;
}
private void determineTechKeyCreation() {
String keyCreation = meta.getTechKeyCreation();
if ( meta.getDatabaseMeta().supportsAutoinc()
&& DimensionLookupMeta.CREATION_METHOD_AUTOINC.equals( keyCreation ) ) {
setTechKeyCreation( CREATION_METHOD_AUTOINC );
} else if ( meta.getDatabaseMeta().supportsSequences()
&& DimensionLookupMeta.CREATION_METHOD_SEQUENCE.equals( keyCreation ) ) {
setTechKeyCreation( CREATION_METHOD_SEQUENCE );
} else {
setTechKeyCreation( CREATION_METHOD_TABLEMAX );
}
}
@Override
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (DimensionLookupMeta) smi;
data = (DimensionLookupData) sdi;
Object[] r = getRow(); // Get row from input rowset & set row busy!
if ( r == null ) { // no more input to be expected...
setOutputDone(); // signal end to receiver(s)
return false;
}
if ( first ) {
first = false;
data.schemaTable =
meta.getDatabaseMeta().getQuotedSchemaTableCombination( data.realSchemaName, data.realTableName );
data.inputRowMeta = getInputRowMeta().clone();
data.outputRowMeta = getInputRowMeta().clone();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
// Get the fields that need conversion to normal storage...
// Modify the storage type of the input data...
//
data.lazyList = new ArrayList<Integer>();
for ( int i = 0; i < data.inputRowMeta.size(); i++ ) {
ValueMetaInterface valueMeta = data.inputRowMeta.getValueMeta( i );
if ( valueMeta.isStorageBinaryString() ) {
data.lazyList.add( i );
valueMeta.setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL );
}
}
// The start date value column (if applicable)
//
data.startDateFieldIndex = -1;
if ( data.startDateChoice == DimensionLookupMeta.START_DATE_ALTERNATIVE_COLUMN_VALUE ) {
data.startDateFieldIndex = data.inputRowMeta.indexOfValue( meta.getStartDateFieldName() );
if ( data.startDateFieldIndex < 0 ) {
throw new KettleStepException( BaseMessages.getString(
PKG, "DimensionLookup.Exception.StartDateValueColumnNotFound", meta.getStartDateFieldName() ) );
}
}
// Lookup values
data.keynrs = new int[meta.getKeyStream().length];
for ( int i = 0; i < meta.getKeyStream().length; i++ ) {
// logDetailed("Lookup values key["+i+"] --> "+key[i]+", row==null?"+(row==null));
data.keynrs[i] = data.inputRowMeta.indexOfValue( meta.getKeyStream()[i] );
if ( data.keynrs[i] < 0 ) { // couldn't find field!
throw new KettleStepException( BaseMessages.getString(
PKG, "DimensionLookup.Exception.KeyFieldNotFound", meta.getKeyStream()[i] ) );
}
}
// Return values
data.fieldnrs = new int[meta.getFieldStream().length];
for ( int i = 0; meta.getFieldStream() != null && i < meta.getFieldStream().length; i++ ) {
if ( !DimensionLookupMeta.isUpdateTypeWithoutArgument( meta.isUpdate(), meta.getFieldUpdate()[i] ) ) {
data.fieldnrs[i] = data.outputRowMeta.indexOfValue( meta.getFieldStream()[i] );
if ( data.fieldnrs[i] < 0 ) {
throw new KettleStepException( BaseMessages.getString(
PKG, "DimensionLookup.Exception.KeyFieldNotFound", meta.getFieldStream()[i] ) );
}
} else {
data.fieldnrs[i] = -1;
}
}
if ( !meta.isUpdate() && meta.isPreloadingCache() ) {
preloadCache();
} else {
// Caching...
//
if ( data.cacheKeyRowMeta == null ) {
// KEY : the natural key(s)
//
data.cacheKeyRowMeta = new RowMeta();
for ( int i = 0; i < data.keynrs.length; i++ ) {
ValueMetaInterface key = data.inputRowMeta.getValueMeta( data.keynrs[i] );
data.cacheKeyRowMeta.addValueMeta( key.clone() );
}
data.cache =
new ByteArrayHashMap( meta.getCacheSize() > 0 ? meta.getCacheSize() : 5000, data.cacheKeyRowMeta );
}
}
if ( !Utils.isEmpty( meta.getDateField() ) ) {
data.datefieldnr = data.inputRowMeta.indexOfValue( meta.getDateField() );
} else {
data.datefieldnr = -1;
}
// Initialize the start date value in case we don't have one in the input rows
//
data.valueDateNow = determineDimensionUpdatedDate( r );
determineTechKeyCreation();
data.notFoundTk = new Long( meta.getDatabaseMeta().getNotFoundTK( isAutoIncrement() ) );
// if (meta.getKeyRename()!=null && meta.getKeyRename().length()>0) data.notFoundTk.setName(meta.getKeyRename());
if ( getCopy() == 0 ) {
checkDimZero();
}
setDimLookup( data.outputRowMeta );
}
// convert row to normal storage...
//
for ( int lazyFieldIndex : data.lazyList ) {
ValueMetaInterface valueMeta = getInputRowMeta().getValueMeta( lazyFieldIndex );
r[lazyFieldIndex] = valueMeta.convertToNormalStorageType( r[lazyFieldIndex] );
}
try {
Object[] outputRow = lookupValues( data.inputRowMeta, r ); // add new values to the row in rowset[0].
putRow( data.outputRowMeta, outputRow ); // copy row to output rowset(s);
if ( checkFeedback( getLinesRead() ) ) {
if ( log.isBasic() ) {
logBasic( BaseMessages.getString( PKG, "DimensionLookup.Log.LineNumber" ) + getLinesRead() );
}
}
} catch ( KettleException e ) {
logError( BaseMessages.getString( PKG, "DimensionLookup.Log.StepCanNotContinueForErrors", e.getMessage() ) );
logError( Const.getStackTracker( e ) );
setErrors( 1 );
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
return true;
}
private Date determineDimensionUpdatedDate( Object[] row ) throws KettleException {
if ( data.datefieldnr < 0 ) {
return getTrans().getCurrentDate(); // start of transformation...
} else {
Date rtn = data.inputRowMeta.getDate( row, data.datefieldnr ); // Date field in the input row
if ( rtn != null ) {
return rtn;
} else {
// Fix for PDI-4816
String inputRowMetaStringMeta = null;
try {
inputRowMetaStringMeta = data.inputRowMeta.toStringMeta();
} catch ( Exception ex ) {
inputRowMetaStringMeta = "No row input meta";
}
throw new KettleStepException( BaseMessages.getString(
PKG, "DimensionLookup.Exception.NullDimensionUpdatedDate", inputRowMetaStringMeta ) );
}
}
}
/**
* Pre-load the cache by reading the whole dimension table from disk...
*
* @throws KettleException
* in case there is a database or cache problem.
*/
private void preloadCache() throws KettleException {
try {
DatabaseMeta databaseMeta = meta.getDatabaseMeta();
// tk, version, from, to, natural keys, retrieval fields...
//
String sql = "SELECT " + databaseMeta.quoteField( meta.getKeyField() );
// sql+=", "+databaseMeta.quoteField(meta.getVersionField());
for ( int i = 0; i < meta.getKeyLookup().length; i++ ) {
sql += ", " + meta.getKeyLookup()[i]; // the natural key field in the table
}
for ( int i = 0; i < meta.getFieldLookup().length; i++ ) {
sql += ", " + meta.getFieldLookup()[i]; // the extra fields to retrieve...
}
sql += ", " + databaseMeta.quoteField( meta.getDateFrom() ); // extra info in cache
sql += ", " + databaseMeta.quoteField( meta.getDateTo() ); // extra info in cache
sql += " FROM " + data.schemaTable;
logDetailed( "Pre-loading cache by reading from database with: " + Const.CR + sql + Const.CR );
List<Object[]> rows = data.db.getRows( sql, -1 );
RowMetaInterface rowMeta = data.db.getReturnRowMeta();
data.preloadKeyIndexes = new int[meta.getKeyLookup().length];
for ( int i = 0; i < data.preloadKeyIndexes.length; i++ ) {
data.preloadKeyIndexes[i] = rowMeta.indexOfValue( meta.getKeyLookup()[i] ); // the field in the table
}
data.preloadFromDateIndex = rowMeta.indexOfValue( meta.getDateFrom() );
data.preloadToDateIndex = rowMeta.indexOfValue( meta.getDateTo() );
data.preloadCache =
new DimensionCache( rowMeta, data.preloadKeyIndexes, data.preloadFromDateIndex, data.preloadToDateIndex );
data.preloadCache.setRowCache( rows );
logDetailed( "Sorting the cache rows..." );
data.preloadCache.sortRows();
logDetailed( "Sorting of cached rows finished." );
// Also see what indexes to take to populate the lookup row...
// We only ever compare indexes and the lookup date in the cache, the rest is not needed...
//
data.preloadIndexes = new ArrayList<Integer>();
for ( int i = 0; i < meta.getKeyStream().length; i++ ) {
int index = data.inputRowMeta.indexOfValue( meta.getKeyStream()[i] );
if ( index < 0 ) {
// Just to be safe...
//
throw new KettleStepException( BaseMessages.getString(
PKG, "DimensionLookup.Exception.KeyFieldNotFound", meta.getFieldStream()[i] ) );
}
data.preloadIndexes.add( index );
}
// This is all for now...
} catch ( Exception e ) {
throw new KettleException( "Error encountered during cache pre-load", e );
}
}
private synchronized Object[] lookupValues( RowMetaInterface rowMeta, Object[] row ) throws KettleException {
Object[] outputRow = new Object[data.outputRowMeta.size()];
RowMetaInterface lookupRowMeta;
Object[] lookupRow;
Object[] returnRow = null;
Long technicalKey;
Long valueVersion;
Date valueDate = null;
Date valueDateFrom = null;
Date valueDateTo = null;
// Determine the lookup date ("now") if we have a field that carries said
// date.
// If not, the system date is taken.
//
valueDate = determineDimensionUpdatedDate( row );
if ( !meta.isUpdate() && meta.isPreloadingCache() ) {
// Obtain a result row from the pre-load cache...
//
// Create a row to compare with
//
RowMetaInterface preloadRowMeta = data.preloadCache.getRowMeta();
// In this case it's all the same. (simple)
//
data.returnRowMeta = data.preloadCache.getRowMeta();
lookupRowMeta = preloadRowMeta;
lookupRow = new Object[preloadRowMeta.size()];
// Assemble the lookup row, convert data if needed...
//
for ( int i = 0; i < data.preloadIndexes.size(); i++ ) {
int from = data.preloadIndexes.get( i ); // Input row index
int to = data.preloadCache.getKeyIndexes()[i]; // Lookup row index
// From data type...
//
ValueMetaInterface fromValueMeta = rowMeta.getValueMeta( from );
// to date type...
//
ValueMetaInterface toValueMeta = data.preloadCache.getRowMeta().getValueMeta( to );
// From value:
//
Object fromData = row[from];
// To value:
//
Object toData = toValueMeta.convertData( fromValueMeta, fromData );
// Set the key in the row...
//
lookupRow[to] = toData;
}
// Also set the lookup date on the "end of date range" (toDate) position
//
lookupRow[data.preloadFromDateIndex] = valueDate;
// Look up the row in the pre-load cache...
//
int index = data.preloadCache.lookupRow( lookupRow );
if ( index >= 0 ) {
returnRow = data.preloadCache.getRow( index );
} else {
returnRow = null; // Nothing found!
}
} else {
lookupRow = new Object[data.lookupRowMeta.size()];
lookupRowMeta = data.lookupRowMeta;
// Construct the lookup row...
//
for ( int i = 0; i < meta.getKeyStream().length; i++ ) {
try {
lookupRow[i] = row[data.keynrs[i]];
} catch ( Exception e ) { // TODO : remove exception??
throw new KettleStepException(
BaseMessages
.getString(
PKG,
"DimensionLookup.Exception.ErrorDetectedInGettingKey", i + "", data.keynrs[i] + "/" + rowMeta.size(),
rowMeta.getString( row ) ) );
}
}
lookupRow[meta.getKeyStream().length] = valueDate; // ? >= date_from
lookupRow[meta.getKeyStream().length + 1] = valueDate; // ? < date_to
if ( isDebug() ) {
logDebug( BaseMessages.getString( PKG, "DimensionLookup.Log.LookupRow" )
+ data.lookupRowMeta.getString( lookupRow ) );
}
// Do the lookup and see if we can find anything in the database.
// But before that, let's see if we can find anything in the cache
//
if ( meta.getCacheSize() >= 0 ) {
returnRow = getFromCache( lookupRow, valueDate );
}
// Nothing found in the cache?
// Perform the lookup in the database...
//
if ( returnRow == null ) {
data.db.setValues( data.lookupRowMeta, lookupRow, data.prepStatementLookup );
returnRow = data.db.getLookup( data.prepStatementLookup );
data.returnRowMeta = data.db.getReturnRowMeta();
incrementLinesInput();
if ( returnRow != null && meta.getCacheSize() >= 0 ) {
addToCache( lookupRow, returnRow );
}
}
}
// This next block of code handles the dimension key LOOKUP ONLY.
// We handle this case where "update = false" first for performance reasons
//
if ( !meta.isUpdate() ) {
if ( returnRow == null ) {
returnRow = new Object[data.returnRowMeta.size()];
returnRow[0] = data.notFoundTk;
if ( meta.getCacheSize() >= 0 ) { // need -oo to +oo as well...
returnRow[returnRow.length - 2] = data.min_date;
returnRow[returnRow.length - 1] = data.max_date;
}
}
// else {
// We found the return values in row "add".
// Throw away the version nr...
// add.removeValue(1);
// Rename the key field if needed. Do it directly in the row...
// if (meta.getKeyRename()!=null && meta.getKeyRename().length()>0)
// add.getValue(0).setName(meta.getKeyRename());
// }
} else {
// This is the "update=true" case where we update the dimension table...
// It is an "Insert - update" algorithm for slowly changing dimensions
//
// The dimension entry was not found, we need to add it!
//
if ( returnRow == null ) {
if ( isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.NoDimensionEntryFound" )
+ lookupRowMeta.getString( lookupRow ) + ")" );
}
// Date range: ]-oo,+oo[
//
if ( data.startDateChoice == DimensionLookupMeta.START_DATE_ALTERNATIVE_SYSDATE ) {
// use the time the step execution begins as the date from.
// before, the current system time was used. this caused an exclusion of the row in the
// lookup portion of the step that uses this 'valueDate' and not the current time.
// the result was multiple inserts for what should have been 1 [PDI-4317]
valueDateFrom = valueDate;
} else {
valueDateFrom = data.min_date;
}
valueDateTo = data.max_date;
valueVersion = new Long( 1L ); // Versions always start at 1.
// get a new value from the sequence generator chosen.
//
technicalKey = null;
switch ( getTechKeyCreation() ) {
case CREATION_METHOD_TABLEMAX:
// What's the next value for the technical key?
technicalKey =
data.db.getNextValue( getTrans().getCounters(), data.realSchemaName, data.realTableName, meta
.getKeyField() );
break;
case CREATION_METHOD_AUTOINC:
technicalKey = null; // Set to null to flag auto-increment usage
break;
case CREATION_METHOD_SEQUENCE:
technicalKey =
data.db.getNextSequenceValue( data.realSchemaName, meta.getSequenceName(), meta.getKeyField() );
if ( technicalKey != null && isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.FoundNextSequence" )
+ technicalKey.toString() );
}
break;
default:
break;
}
/*
* INSERT INTO table(version, datefrom, dateto, fieldlookup) VALUES(valueVersion, valueDateFrom, valueDateTo,
* row.fieldnrs) ;
*/
technicalKey =
dimInsert( data.inputRowMeta, row, technicalKey, true, valueVersion, valueDateFrom, valueDateTo );
incrementLinesOutput();
returnRow = new Object[data.returnRowMeta.size()];
int returnIndex = 0;
returnRow[returnIndex] = technicalKey;
returnIndex++;
// See if we need to store this record in the cache as well...
/*
* TODO: we can't really assume like this that the cache layout of the incoming rows (below) is the same as the
* stored data. Storing this in the cache gives us data/metadata collision errors. (class cast problems etc)
* Perhaps we need to convert this data to the target data types. Alternatively, we can use a separate cache in
* the future. Reference: PDI-911
*
* if (meta.getCacheSize()>=0) { Object[] values = getCacheValues(rowMeta, row, technicalKey, valueVersion,
* valueDateFrom, valueDateTo);
*
* // put it in the cache... if (values!=null) { addToCache(lookupRow, values); } }
*/
if ( isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.AddedDimensionEntry" )
+ data.returnRowMeta.getString( returnRow ) );
}
} else {
//
// The entry was found: do we need to insert, update or both?
//
if ( isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.DimensionEntryFound" )
+ data.returnRowMeta.getString( returnRow ) );
}
// What's the key? The first value of the return row
technicalKey = data.returnRowMeta.getInteger( returnRow, 0 );
valueVersion = data.returnRowMeta.getInteger( returnRow, 1 );
// Date range: ]-oo,+oo[
valueDateFrom = meta.getMinDate();
valueDateTo = meta.getMaxDate();
// The other values, we compare with
int cmp;
// If everything is the same: don't do anything
// If one of the fields is different: insert or update
// If all changed fields have update = Y, update
// If one of the changed fields has update = N, insert
boolean insert = false;
boolean identical = true;
boolean punch = false;
// Column lookup array - initialize to all -1
if ( columnLookupArray == null ) {
columnLookupArray = new int[meta.getFieldStream().length];
for ( int i = 0; i < columnLookupArray.length; i++ ) {
columnLookupArray[i] = -1;
}
}
// Integer returnRowColNum = null;
int returnRowColNum = -1;
String findColumn = null;
for ( int i = 0; i < meta.getFieldStream().length; i++ ) {
if ( data.fieldnrs[i] >= 0 ) {
// Only compare real fields, not last updated row, last version, etc
//
ValueMetaInterface v1 = data.outputRowMeta.getValueMeta( data.fieldnrs[i] );
Object valueData1 = row[data.fieldnrs[i]];
findColumn = meta.getFieldStream()[i];
// find the returnRowMeta based on the field in the fieldLookup list
ValueMetaInterface v2 = null;
Object valueData2 = null;
// Fix for PDI-8122
// See if it's already been computed.
returnRowColNum = columnLookupArray[i];
if ( returnRowColNum == -1 ) {
// It hasn't been found yet - search the list and make sure we're comparing
// the right column to the right column.
for ( int j = 2; j < data.returnRowMeta.size(); j++ ) { // starting at 2 because I know that 0 and 1 are
// poked in by Kettle.
v2 = data.returnRowMeta.getValueMeta( j );
if ( ( v2.getName() != null ) && ( v2.getName().equalsIgnoreCase( findColumn ) ) ) { // is this the
// right column?
columnLookupArray[i] = j; // yes - record the "j" into the columnLookupArray at [i] for the next time
// through the loop
valueData2 = returnRow[j]; // get the valueData2 for comparison
break; // get outta here.
} else {
// Reset to null because otherwise, we'll get a false finding at the end.
// This could be optimized to use a temporary variable to avoid the repeated set if necessary
// but it will never be as slow as the database lookup anyway
v2 = null;
}
}
} else {
// We have a value in the columnLookupArray - use the value stored there.
v2 = data.returnRowMeta.getValueMeta( returnRowColNum );
valueData2 = returnRow[returnRowColNum];
}
if ( v2 == null ) {
// If we made it here, then maybe someone tweaked the XML in the transformation
// and we're matching a stream column to a column that doesn't really exist. Throw an exception.
throw new KettleStepException( BaseMessages.getString(
PKG, "DimensionLookup.Exception.ErrorDetectedInComparingFields", meta.getFieldStream()[i] ) );
}
try {
cmp = v1.compare( valueData1, v2, valueData2 );
} catch ( ClassCastException e ) {
throw e;
}
// Not the same and update = 'N' --> insert
if ( cmp != 0 ) {
identical = false;
}
// Field flagged for insert: insert
if ( cmp != 0 && meta.getFieldUpdate()[i] == DimensionLookupMeta.TYPE_UPDATE_DIM_INSERT ) {
insert = true;
}
// Field flagged for punchthrough
if ( cmp != 0 && meta.getFieldUpdate()[i] == DimensionLookupMeta.TYPE_UPDATE_DIM_PUNCHTHROUGH ) {
punch = true;
}
if ( isRowLevel() ) {
logRowlevel( BaseMessages
.getString(
PKG,
"DimensionLookup.Log.ComparingValues", "" + v1, "" + v2, String.valueOf( cmp ), String
.valueOf( identical ), String.valueOf( insert ), String.valueOf( punch ) ) );
}
}
}
// After comparing the record in the database and the data in the input
// and taking into account the rules of the slowly changing dimension,
// we found out whether or not to perform an insert or an update.
//
if ( !insert ) { // Just an update of row at key = valueKey
if ( !identical ) {
if ( isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.UpdateRowWithValues" )
+ data.inputRowMeta.getString( row ) );
}
/*
* UPDATE d_customer SET fieldlookup[] = row.getValue(fieldnrs) WHERE returnkey = dimkey
*/
dimUpdate( rowMeta, row, technicalKey, valueDate );
incrementLinesUpdated();
// We need to capture this change in the cache as well...
if ( meta.getCacheSize() >= 0 ) {
Object[] values =
getCacheValues( rowMeta, row, technicalKey, valueVersion, valueDateFrom, valueDateTo );
addToCache( lookupRow, values );
}
} else {
if ( isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.SkipLine" ) );
}
// Don't do anything, everything is file in de dimension.
incrementLinesSkipped();
}
} else {
if ( isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.InsertNewVersion" )
+ technicalKey.toString() );
}
Long valueNewVersion = valueVersion + 1;
// From date (valueDate) is calculated at the start of this method to
// be either the system date or the value in a column
//
valueDateFrom = valueDate;
valueDateTo = data.max_date;
// First try to use an AUTOINCREMENT field
if ( meta.getDatabaseMeta().supportsAutoinc() && isAutoIncrement() ) {
technicalKey = null; // value to accept new key...
} else if ( meta.getDatabaseMeta().supportsSequences()
// Try to get the value by looking at a SEQUENCE (oracle mostly)
&& meta.getSequenceName() != null && meta.getSequenceName().length() > 0 ) {
technicalKey =
data.db.getNextSequenceValue( data.realSchemaName, meta.getSequenceName(), meta.getKeyField() );
if ( technicalKey != null && isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.FoundNextSequence2" )
+ technicalKey.toString() );
}
} else {
// Use our own sequence here...
// What's the next value for the technical key?
technicalKey =
data.db.getNextValue( getTrans().getCounters(), data.realSchemaName, data.realTableName, meta
.getKeyField() );
}
// update our technicalKey with the return of the insert
technicalKey =
dimInsert( rowMeta, row, technicalKey, false, valueNewVersion, valueDateFrom, valueDateTo );
incrementLinesOutput();
// We need to capture this change in the cache as well...
if ( meta.getCacheSize() >= 0 ) {
Object[] values =
getCacheValues( rowMeta, row, technicalKey, valueNewVersion, valueDateFrom, valueDateTo );
addToCache( lookupRow, values );
}
}
if ( punch ) { // On of the fields we have to punch through has changed!
/*
* This means we have to update all versions:
*
* UPDATE dim SET punchf1 = val1, punchf2 = val2, ... WHERE fieldlookup[] = ? ;
*
* --> update ALL versions in the dimension table.
*/
dimPunchThrough( rowMeta, row );
incrementLinesUpdated();
}
returnRow = new Object[data.returnRowMeta.size()];
returnRow[0] = technicalKey;
if ( isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.TechnicalKey" ) + technicalKey );
}
}
}
if ( isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "DimensionLookup.Log.AddValuesToRow" )
+ data.returnRowMeta.getString( returnRow ) );
}
// Copy the results to the output row...
//
// First copy the input row values to the output..
//
for ( int i = 0; i < rowMeta.size(); i++ ) {
outputRow[i] = row[i];
}
int outputIndex = rowMeta.size();
int inputIndex = 0;
// Then the technical key...
//
if ( data.returnRowMeta.getValueMeta( 0 ).isBigNumber() && returnRow[0] instanceof Long ) {
if ( isDebug() ) {
log.logDebug( "Changing the type of the technical key from TYPE_BIGNUMBER to an TYPE_INTEGER" );
}
ValueMetaInterface tkValueMeta = data.returnRowMeta.getValueMeta( 0 );
data.returnRowMeta.setValueMeta( 0, ValueMetaFactory.cloneValueMeta(
tkValueMeta, ValueMetaInterface.TYPE_INTEGER ) );
}
outputRow[outputIndex++] = data.returnRowMeta.getInteger( returnRow, inputIndex++ );
// skip the version in the input
inputIndex++;
// Then get the "extra fields"...
// don't return date from-to fields, they can be returned when explicitly
// specified in lookup fields.
while ( inputIndex < returnRow.length && outputIndex < outputRow.length ) {
outputRow[outputIndex] = returnRow[inputIndex];
outputIndex++;
inputIndex++;
}
// Finaly, check the date range!
/*
* TODO: WTF is this??? [May be it makes sense to keep the return date from-to fields within min/max range, but even
* then the code below is wrong]. Value date; if (data.datefieldnr>=0) date = row.getValue(data.datefieldnr); else
* date = new Value("date", new Date()); // system date
*
* if (data.min_date.compare(date)>0) data.min_date.setValue( date.getDate() ); if (data.max_date.compare(date)<0)
* data.max_date.setValue( date.getDate() );
*/
return outputRow;
}
/**
* table: dimension table keys[]: which dim-fields do we use to look up key? retval: name of the key to return
* datefield: do we have a datefield? datefrom, dateto: date-range, if any.
*/
private void setDimLookup( RowMetaInterface rowMeta ) throws KettleDatabaseException {
DatabaseMeta databaseMeta = meta.getDatabaseMeta();
data.lookupRowMeta = new RowMeta();
/*
* DEFAULT, SYSDATE, START_TRANS, COLUMN_VALUE :
*
* SELECT <tk>, <version>, ... , FROM <table> WHERE key1=keys[1] AND key2=keys[2] ... AND <datefrom> <= <datefield>
* AND <dateto> > <datefield> ;
*
* NULL :
*
* SELECT <tk>, <version>, ... , FROM <table> WHERE key1=keys[1] AND key2=keys[2] ... AND ( <datefrom> is null OR
* <datefrom> <= <datefield> ) AND <dateto> >= <datefield>
*/
String sql =
"SELECT "
+ databaseMeta.quoteField( meta.getKeyField() ) + ", "
+ databaseMeta.quoteField( meta.getVersionField() );
if ( !Utils.isEmpty( meta.getFieldLookup() ) ) {
for ( int i = 0; i < meta.getFieldLookup().length; i++ ) {
// Don't retrieve the fields without input
if ( !Utils.isEmpty( meta.getFieldLookup()[i] )
&& !DimensionLookupMeta.isUpdateTypeWithoutArgument( meta.isUpdate(), meta.getFieldUpdate()[i] ) ) {
sql += ", " + databaseMeta.quoteField( meta.getFieldLookup()[i] );
if ( !Utils.isEmpty( meta.getFieldStream()[i] )
&& !meta.getFieldLookup()[i].equals( meta.getFieldStream()[i] ) ) {
sql += " AS " + databaseMeta.quoteField( meta.getFieldStream()[i] );
}
}
}
}
if ( meta.getCacheSize() >= 0 ) {
sql +=
", " + databaseMeta.quoteField( meta.getDateFrom() ) + ", " + databaseMeta.quoteField( meta.getDateTo() );
}
sql += " FROM " + data.schemaTable + " WHERE ";
for ( int i = 0; i < meta.getKeyLookup().length; i++ ) {
if ( i != 0 ) {
sql += " AND ";
}
sql += databaseMeta.quoteField( meta.getKeyLookup()[i] ) + " = ? ";
data.lookupRowMeta.addValueMeta( rowMeta.getValueMeta( data.keynrs[i] ) );
}
String dateFromField = databaseMeta.quoteField( meta.getDateFrom() );
String dateToField = databaseMeta.quoteField( meta.getDateTo() );
if ( meta.isUsingStartDateAlternative()
&& ( meta.getStartDateAlternative() == DimensionLookupMeta.START_DATE_ALTERNATIVE_NULL )
|| ( meta.getStartDateAlternative() == DimensionLookupMeta.START_DATE_ALTERNATIVE_COLUMN_VALUE ) ) {
// Null as a start date is possible...
//
sql += " AND ( " + dateFromField + " IS NULL OR " + dateFromField + " <= ? )" + Const.CR;
sql += " AND " + dateToField + " > ?" + Const.CR;
data.lookupRowMeta.addValueMeta( new ValueMetaDate( meta.getDateFrom() ) );
data.lookupRowMeta.addValueMeta( new ValueMetaDate( meta.getDateTo() ) );
} else {
// Null as a start date is NOT possible
//
sql += " AND ? >= " + dateFromField + Const.CR;
sql += " AND ? < " + dateToField + Const.CR;
data.lookupRowMeta.addValueMeta( new ValueMetaDate( meta.getDateFrom() ) );
data.lookupRowMeta.addValueMeta( new ValueMetaDate( meta.getDateTo() ) );
}
try {
logDetailed( "Dimension Lookup setting preparedStatement to [" + sql + "]" );
data.prepStatementLookup = data.db.getConnection().prepareStatement( databaseMeta.stripCR( sql ) );
if ( databaseMeta.supportsSetMaxRows() ) {
data.prepStatementLookup.setMaxRows( 1 ); // alywas get only 1 line back!
}
if ( databaseMeta.getDatabaseInterface().isMySQLVariant() ) {
data.prepStatementLookup.setFetchSize( 0 ); // Make sure to DISABLE Streaming Result sets
}
logDetailed( "Finished preparing dimension lookup statement." );
} catch ( SQLException ex ) {
throw new KettleDatabaseException( "Unable to prepare dimension lookup", ex );
}
}
protected boolean isAutoIncrement() {
return techKeyCreation == CREATION_METHOD_AUTOINC;
}
/**
* This inserts new record into dimension Optionally, if the entry already exists, update date range from previous
* version of the entry.
*/
public Long dimInsert( RowMetaInterface inputRowMeta, Object[] row, Long technicalKey, boolean newEntry,
Long versionNr, Date dateFrom, Date dateTo ) throws KettleException {
DatabaseMeta databaseMeta = meta.getDatabaseMeta();
if ( data.prepStatementInsert == null && data.prepStatementUpdate == null ) { // first time: construct prepared statement
RowMetaInterface insertRowMeta = new RowMeta();
/*
* Construct the SQL statement...
*
* INSERT INTO d_customer(keyfield, versionfield, datefrom, dateto, key[], fieldlookup[], last_updated,
* last_inserted, last_version) VALUES (val_key ,val_version , val_datfrom, val_datto, keynrs[], fieldnrs[],
* last_updated, last_inserted, last_version) ;
*/
String sql = "INSERT INTO " + data.schemaTable + "( ";
if ( !isAutoIncrement() ) {
sql += databaseMeta.quoteField( meta.getKeyField() ) + ", "; // NO
// AUTOINCREMENT
insertRowMeta.addValueMeta( data.outputRowMeta.getValueMeta( inputRowMeta.size() ) ); // the first return value
// after the input
} else {
if ( databaseMeta.needsPlaceHolder() ) {
sql += "0, "; // placeholder on informix!
}
}
sql +=
databaseMeta.quoteField( meta.getVersionField() )
+ ", " + databaseMeta.quoteField( meta.getDateFrom() ) + ", "
+ databaseMeta.quoteField( meta.getDateTo() );
insertRowMeta.addValueMeta( new ValueMetaInteger( meta.getVersionField() ) );
insertRowMeta.addValueMeta( new ValueMetaDate( meta.getDateFrom() ) );
insertRowMeta.addValueMeta( new ValueMetaDate( meta.getDateTo() ) );
for ( int i = 0; i < meta.getKeyLookup().length; i++ ) {
sql += ", " + databaseMeta.quoteField( meta.getKeyLookup()[i] );
insertRowMeta.addValueMeta( inputRowMeta.getValueMeta( data.keynrs[i] ) );
}
for ( int i = 0; i < meta.getFieldLookup().length; i++ ) {
// Ignore last_version, last_updated etc, they are handled below (at the
// back of the row).
//
if ( !DimensionLookupMeta.isUpdateTypeWithoutArgument( meta.isUpdate(), meta.getFieldUpdate()[i] ) ) {
sql += ", " + databaseMeta.quoteField( meta.getFieldLookup()[i] );
insertRowMeta.addValueMeta( inputRowMeta.getValueMeta( data.fieldnrs[i] ) );
}
}
// Finally, the special update fields...
//
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
ValueMetaInterface valueMeta = null;
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSERTED:
valueMeta = new ValueMetaDate( meta.getFieldLookup()[i] );
break;
case DimensionLookupMeta.TYPE_UPDATE_LAST_VERSION:
valueMeta = new ValueMetaBoolean( meta.getFieldLookup()[i] );
break;
default:
break;
}
if ( valueMeta != null ) {
sql += ", " + databaseMeta.quoteField( valueMeta.getName() );
insertRowMeta.addValueMeta( valueMeta );
}
}
sql += ") VALUES (";
if ( !isAutoIncrement() ) {
sql += "?, ";
}
sql += "?, ?, ?";
for ( int i = 0; i < data.keynrs.length; i++ ) {
sql += ", ?";
}
for ( int i = 0; i < meta.getFieldLookup().length; i++ ) {
// Ignore last_version, last_updated, etc. These are handled below...
//
if ( !DimensionLookupMeta.isUpdateTypeWithoutArgument( meta.isUpdate(), meta.getFieldUpdate()[i] ) ) {
sql += ", ?";
}
}
// The special update fields...
//
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSERTED:
case DimensionLookupMeta.TYPE_UPDATE_LAST_VERSION:
sql += ", ?";
break;
default:
break;
}
}
sql += " )";
try {
if ( technicalKey == null && databaseMeta.supportsAutoGeneratedKeys() ) {
logDetailed( "SQL w/ return keys=[" + sql + "]" );
data.prepStatementInsert =
data.db.getConnection().prepareStatement(
databaseMeta.stripCR( sql ), Statement.RETURN_GENERATED_KEYS );
} else {
logDetailed( "SQL=[" + sql + "]" );
data.prepStatementInsert = data.db.getConnection().prepareStatement( databaseMeta.stripCR( sql ) );
}
// pstmt=con.prepareStatement(sql, new String[] { "klant_tk" } );
} catch ( SQLException ex ) {
throw new KettleDatabaseException( "Unable to prepare dimension insert :" + Const.CR + sql, ex );
}
/*
* UPDATE d_customer SET dateto = val_datnow, last_updated = <now> last_version = false WHERE keylookup[] =
* keynrs[] AND versionfield = val_version - 1 ;
*/
RowMetaInterface updateRowMeta = new RowMeta();
String sql_upd = "UPDATE " + data.schemaTable + Const.CR;
// The end of the date range
//
sql_upd += "SET " + databaseMeta.quoteField( meta.getDateTo() ) + " = ?" + Const.CR;
updateRowMeta.addValueMeta( new ValueMetaDate( meta.getDateTo() ) );
// The special update fields...
//
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
ValueMetaInterface valueMeta = null;
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
case DimensionLookupMeta.TYPE_UPDATE_DATE_UPDATED:
valueMeta = new ValueMetaDate( meta.getFieldLookup()[i] );
break;
case DimensionLookupMeta.TYPE_UPDATE_LAST_VERSION:
valueMeta = new ValueMetaBoolean( meta.getFieldLookup()[i] );
break;
default:
break;
}
if ( valueMeta != null ) {
sql_upd += ", " + databaseMeta.quoteField( valueMeta.getName() ) + " = ?" + Const.CR;
updateRowMeta.addValueMeta( valueMeta );
}
}
sql_upd += "WHERE ";
for ( int i = 0; i < meta.getKeyLookup().length; i++ ) {
if ( i > 0 ) {
sql_upd += "AND ";
}
sql_upd += databaseMeta.quoteField( meta.getKeyLookup()[i] ) + " = ?" + Const.CR;
updateRowMeta.addValueMeta( inputRowMeta.getValueMeta( data.keynrs[i] ) );
}
sql_upd += "AND " + databaseMeta.quoteField( meta.getVersionField() ) + " = ? ";
updateRowMeta.addValueMeta( new ValueMetaInteger( meta.getVersionField() ) );
try {
logDetailed( "Preparing update: " + Const.CR + sql_upd + Const.CR );
data.prepStatementUpdate = data.db.getConnection().prepareStatement( databaseMeta.stripCR( sql_upd ) );
} catch ( SQLException ex ) {
throw new KettleDatabaseException( "Unable to prepare dimension update :" + Const.CR + sql_upd, ex );
}
data.insertRowMeta = insertRowMeta;
data.updateRowMeta = updateRowMeta;
}
Object[] insertRow = new Object[data.insertRowMeta.size()];
int insertIndex = 0;
if ( !isAutoIncrement() ) {
insertRow[insertIndex++] = technicalKey;
}
// Caller is responsible for setting proper version number depending
// on if newEntry == true
insertRow[insertIndex++] = versionNr;
switch ( data.startDateChoice ) {
case DimensionLookupMeta.START_DATE_ALTERNATIVE_NONE:
insertRow[insertIndex++] = dateFrom;
break;
case DimensionLookupMeta.START_DATE_ALTERNATIVE_SYSDATE:
// use the time the step execution begins as the date from (passed in as dateFrom).
// before, the current system time was used. this caused an exclusion of the row in the
// lookup portion of the step that uses this 'valueDate' and not the current time.
// the result was multiple inserts for what should have been 1 [PDI-4317]
insertRow[insertIndex++] = dateFrom;
break;
case DimensionLookupMeta.START_DATE_ALTERNATIVE_START_OF_TRANS:
insertRow[insertIndex++] = getTrans().getStartDate();
break;
case DimensionLookupMeta.START_DATE_ALTERNATIVE_NULL:
insertRow[insertIndex++] = null;
break;
case DimensionLookupMeta.START_DATE_ALTERNATIVE_COLUMN_VALUE:
insertRow[insertIndex++] = inputRowMeta.getDate( row, data.startDateFieldIndex );
break;
default:
throw new KettleStepException( BaseMessages.getString(
PKG, "DimensionLookup.Exception.IllegalStartDateSelection", Integer.toString( data.startDateChoice ) ) );
}
insertRow[insertIndex++] = dateTo;
for ( int i = 0; i < data.keynrs.length; i++ ) {
insertRow[insertIndex++] = row[data.keynrs[i]];
}
for ( int i = 0; i < data.fieldnrs.length; i++ ) {
if ( data.fieldnrs[i] >= 0 ) {
// Ignore last_version, last_updated, etc. These are handled below...
//
insertRow[insertIndex++] = row[data.fieldnrs[i]];
}
}
// The special update fields...
//
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSERTED:
insertRow[insertIndex++] = new Date();
break;
case DimensionLookupMeta.TYPE_UPDATE_LAST_VERSION:
insertRow[insertIndex++] = Boolean.TRUE;
break; // Always the last version on insert.
default:
break;
}
}
if ( isDebug() ) {
logDebug( "rins, size=" + data.insertRowMeta.size() + ", values=" + data.insertRowMeta.getString( insertRow ) );
}
// INSERT NEW VALUE!
data.db.setValues( data.insertRowMeta, insertRow, data.prepStatementInsert );
data.db.insertRow( data.prepStatementInsert );
if ( isDebug() ) {
logDebug( "Row inserted!" );
}
if ( technicalKey == null && databaseMeta.supportsAutoGeneratedKeys() ) {
try {
RowMetaAndData keys = data.db.getGeneratedKeys( data.prepStatementInsert );
if ( keys.getRowMeta().size() > 0 ) {
technicalKey = keys.getRowMeta().getInteger( keys.getData(), 0 );
} else {
throw new KettleDatabaseException(
"Unable to retrieve value of auto-generated technical key : no value found!" );
}
} catch ( Exception e ) {
throw new KettleDatabaseException(
"Unable to retrieve value of auto-generated technical key : unexpected error: ", e );
}
}
if ( !newEntry ) { // we have to update the previous version in the dimension!
/*
* UPDATE d_customer SET dateto = val_datfrom , last_updated = <now> , last_version = false WHERE keylookup[] =
* keynrs[] AND versionfield = val_version - 1 ;
*/
Object[] updateRow = new Object[data.updateRowMeta.size()];
int updateIndex = 0;
switch ( data.startDateChoice ) {
case DimensionLookupMeta.START_DATE_ALTERNATIVE_NONE:
updateRow[updateIndex++] = dateFrom;
break;
case DimensionLookupMeta.START_DATE_ALTERNATIVE_SYSDATE:
updateRow[updateIndex++] = new Date();
break;
case DimensionLookupMeta.START_DATE_ALTERNATIVE_START_OF_TRANS:
updateRow[updateIndex++] = getTrans().getCurrentDate();
break;
case DimensionLookupMeta.START_DATE_ALTERNATIVE_NULL:
updateRow[updateIndex++] = null;
break;
case DimensionLookupMeta.START_DATE_ALTERNATIVE_COLUMN_VALUE:
updateRow[updateIndex++] = inputRowMeta.getDate( row, data.startDateFieldIndex );
break;
default:
throw new KettleStepException( BaseMessages.getString(
"DimensionLookup.Exception.IllegalStartDateSelection", Integer.toString( data.startDateChoice ) ) );
}
// The special update fields...
//
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
updateRow[updateIndex++] = new Date();
break;
case DimensionLookupMeta.TYPE_UPDATE_LAST_VERSION:
updateRow[updateIndex++] = Boolean.FALSE;
break; // Never the last version on this update
case DimensionLookupMeta.TYPE_UPDATE_DATE_UPDATED:
updateRow[updateIndex++] = new Date();
break;
default:
break;
}
}
for ( int i = 0; i < data.keynrs.length; i++ ) {
updateRow[updateIndex++] = row[data.keynrs[i]];
}
updateRow[updateIndex++] = versionNr - 1;
if ( isRowLevel() ) {
logRowlevel( "UPDATE using rupd=" + data.updateRowMeta.getString( updateRow ) );
}
// UPDATE VALUES
// set values for update
//
data.db.setValues( data.updateRowMeta, updateRow, data.prepStatementUpdate );
if ( isDebug() ) {
logDebug( "Values set for update (" + data.updateRowMeta.size() + ")" );
}
data.db.insertRow( data.prepStatementUpdate ); // do the actual update
if ( isDebug() ) {
logDebug( "Row updated!" );
}
}
return technicalKey;
}
@Override
public boolean isRowLevel() {
return log.isRowLevel();
}
@Override
public boolean isDebug() {
return log.isDebug();
}
public void dimUpdate( RowMetaInterface rowMeta, Object[] row, Long dimkey, Date valueDate ) throws KettleDatabaseException {
if ( data.prepStatementDimensionUpdate == null ) {
// first time: construct prepared statement
//
data.dimensionUpdateRowMeta = new RowMeta();
// Construct the SQL statement...
/*
* UPDATE d_customer SET fieldlookup[] = row.getValue(fieldnrs) , last_updated = <now> WHERE returnkey = dimkey ;
*/
String sql = "UPDATE " + data.schemaTable + Const.CR + "SET ";
boolean comma = false;
for ( int i = 0; i < meta.getFieldLookup().length; i++ ) {
if ( !DimensionLookupMeta.isUpdateTypeWithoutArgument( meta.isUpdate(), meta.getFieldUpdate()[i] ) ) {
if ( comma ) {
sql += ", ";
} else {
sql += " ";
}
comma = true;
sql += meta.getDatabaseMeta().quoteField( meta.getFieldLookup()[i] ) + " = ?" + Const.CR;
data.dimensionUpdateRowMeta.addValueMeta( rowMeta.getValueMeta( data.fieldnrs[i] ) );
}
}
// The special update fields...
//
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
ValueMetaInterface valueMeta = null;
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
case DimensionLookupMeta.TYPE_UPDATE_DATE_UPDATED:
valueMeta = new ValueMetaDate( meta.getFieldLookup()[i] );
break;
default:
break;
}
if ( valueMeta != null ) {
if ( comma ) {
sql += ", ";
} else {
sql += " ";
}
comma = true;
sql += meta.getDatabaseMeta().quoteField( valueMeta.getName() ) + " = ?" + Const.CR;
data.dimensionUpdateRowMeta.addValueMeta( valueMeta );
}
}
sql += "WHERE " + meta.getDatabaseMeta().quoteField( meta.getKeyField() ) + " = ?";
data.dimensionUpdateRowMeta
.addValueMeta( new ValueMetaInteger( meta.getKeyField() ) ); // The tk
try {
if ( isDebug() ) {
logDebug( "Preparing statement: [" + sql + "]" );
}
data.prepStatementDimensionUpdate =
data.db.getConnection().prepareStatement( meta.getDatabaseMeta().stripCR( sql ) );
} catch ( SQLException ex ) {
throw new KettleDatabaseException( "Couldn't prepare statement :" + Const.CR + sql, ex );
}
}
// Assemble information
// New
Object[] dimensionUpdateRow = new Object[data.dimensionUpdateRowMeta.size()];
int updateIndex = 0;
for ( int i = 0; i < data.fieldnrs.length; i++ ) {
// Ignore last_version, last_updated, etc. These are handled below...
//
if ( data.fieldnrs[i] >= 0 ) {
dimensionUpdateRow[updateIndex++] = row[data.fieldnrs[i]];
}
}
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
case DimensionLookupMeta.TYPE_UPDATE_DATE_UPDATED:
dimensionUpdateRow[updateIndex++] = valueDate;
break;
default:
break;
}
}
dimensionUpdateRow[updateIndex++] = dimkey;
data.db.setValues( data.dimensionUpdateRowMeta, dimensionUpdateRow, data.prepStatementDimensionUpdate );
data.db.insertRow( data.prepStatementDimensionUpdate );
}
// This updates all versions of a dimension entry.
//
public void dimPunchThrough( RowMetaInterface rowMeta, Object[] row ) throws KettleDatabaseException {
if ( data.prepStatementPunchThrough == null ) { // first time: construct prepared statement
DatabaseMeta databaseMeta = meta.getDatabaseMeta();
data.punchThroughRowMeta = new RowMeta();
/*
* UPDATE table SET punchv1 = fieldx, ... , last_updated = <now> WHERE keylookup[] = keynrs[] ;
*/
String sql_upd = "UPDATE " + data.schemaTable + Const.CR;
sql_upd += "SET ";
boolean first = true;
for ( int i = 0; i < meta.getFieldLookup().length; i++ ) {
if ( meta.getFieldUpdate()[i] == DimensionLookupMeta.TYPE_UPDATE_DIM_PUNCHTHROUGH ) {
if ( !first ) {
sql_upd += ", ";
} else {
sql_upd += " ";
}
first = false;
sql_upd += databaseMeta.quoteField( meta.getFieldLookup()[i] ) + " = ?" + Const.CR;
data.punchThroughRowMeta.addValueMeta( rowMeta.getValueMeta( data.fieldnrs[i] ) );
}
}
// The special update fields...
//
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
ValueMetaInterface valueMeta = null;
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
case DimensionLookupMeta.TYPE_UPDATE_DATE_UPDATED:
valueMeta = new ValueMetaDate( meta.getFieldLookup()[i] );
break;
default:
break;
}
if ( valueMeta != null ) {
sql_upd += ", " + databaseMeta.quoteField( valueMeta.getName() ) + " = ?" + Const.CR;
data.punchThroughRowMeta.addValueMeta( valueMeta );
}
}
sql_upd += "WHERE ";
for ( int i = 0; i < meta.getKeyLookup().length; i++ ) {
if ( i > 0 ) {
sql_upd += "AND ";
}
sql_upd += databaseMeta.quoteField( meta.getKeyLookup()[i] ) + " = ?" + Const.CR;
data.punchThroughRowMeta.addValueMeta( rowMeta.getValueMeta( data.keynrs[i] ) );
}
try {
data.prepStatementPunchThrough =
data.db.getConnection().prepareStatement( meta.getDatabaseMeta().stripCR( sql_upd ) );
} catch ( SQLException ex ) {
throw new KettleDatabaseException( "Unable to prepare dimension punchThrough update statement : "
+ Const.CR + sql_upd, ex );
}
}
Object[] punchThroughRow = new Object[data.punchThroughRowMeta.size()];
int punchIndex = 0;
for ( int i = 0; i < meta.getFieldLookup().length; i++ ) {
if ( meta.getFieldUpdate()[i] == DimensionLookupMeta.TYPE_UPDATE_DIM_PUNCHTHROUGH ) {
punchThroughRow[punchIndex++] = row[data.fieldnrs[i]];
}
}
for ( int i = 0; i < meta.getFieldUpdate().length; i++ ) {
switch ( meta.getFieldUpdate()[i] ) {
case DimensionLookupMeta.TYPE_UPDATE_DATE_INSUP:
case DimensionLookupMeta.TYPE_UPDATE_DATE_UPDATED:
punchThroughRow[punchIndex++] = new Date();
break;
default:
break;
}
}
for ( int i = 0; i < data.keynrs.length; i++ ) {
punchThroughRow[punchIndex++] = row[data.keynrs[i]];
}
// UPDATE VALUES
data.db.setValues( data.punchThroughRowMeta, punchThroughRow, data.prepStatementPunchThrough ); // set values for
// update
data.db.insertRow( data.prepStatementPunchThrough ); // do the actual punch through update
}
/**
* Keys: - natural key fields Values: - Technical key - lookup fields / extra fields (allows us to compare or
* retrieve) - Date_from - Date_to
*
* @param row
* The input row
* @param technicalKey
* the technical key value
* @param valueDateFrom
* the start of valid date range
* @param valueDateTo
* the end of the valid date range
* @return the values to store in the cache as a row.
*/
private Object[] getCacheValues( RowMetaInterface rowMeta, Object[] row, Long technicalKey, Long valueVersion,
Date valueDateFrom, Date valueDateTo ) {
if ( data.cacheValueRowMeta == null ) {
return null; // nothing is in the cache.
}
Object[] cacheValues = new Object[data.cacheValueRowMeta.size()];
int cacheIndex = 0;
cacheValues[cacheIndex++] = technicalKey;
cacheValues[cacheIndex++] = valueVersion;
for ( int i = 0; i < data.fieldnrs.length; i++ ) {
// Ignore last_version, last_updated, etc. These are handled below...
//
if ( data.fieldnrs[i] >= 0 ) {
cacheValues[cacheIndex++] = row[data.fieldnrs[i]];
}
}
cacheValues[cacheIndex++] = valueDateFrom;
cacheValues[cacheIndex++] = valueDateTo;
return cacheValues;
}
/**
* Adds a row to the cache In case we are doing updates, we need to store the complete rows from the database. These
* are the values we need to store
*
* Key: - natural key fields Value: - Technical key - lookup fields / extra fields (allows us to compare or retrieve)
* - Date_from - Date_to
*
* @param keyValues
* @param returnValues
* @throws KettleValueException
*/
private void addToCache( Object[] keyValues, Object[] returnValues ) throws KettleValueException {
if ( data.cacheValueRowMeta == null ) {
data.cacheValueRowMeta = assembleCacheValueRowMeta();
}
// store it in the cache if needed.
byte[] keyPart = RowMeta.extractData( data.cacheKeyRowMeta, keyValues );
byte[] valuePart = RowMeta.extractData( data.cacheValueRowMeta, returnValues );
data.cache.put( keyPart, valuePart );
// check if the size is not too big...
// Allow for a buffer overrun of 20% and then remove those 20% in one go.
// Just to keep performance in track.
//
int tenPercent = meta.getCacheSize() / 10;
if ( meta.getCacheSize() > 0 && data.cache.size() > meta.getCacheSize() + tenPercent ) {
// Which cache entries do we delete here?
// We delete those with the lowest technical key...
// Those would arguably be the "oldest" dimension entries.
// Oh well... Nothing is going to be perfect here...
//
// Getting the lowest 20% requires some kind of sorting algorithm and I'm not sure we want to do that.
// Sorting is slow and even in the best case situation we need to do 2 passes over the cache entries...
//
// Perhaps we should get 20% random values and delete everything below the lowest but one TK.
//
List<byte[]> keys = data.cache.getKeys();
int sizeBefore = keys.size();
List<Long> samples = new ArrayList<Long>();
// Take 10 sample technical keys....
int stepsize = keys.size() / 5;
if ( stepsize < 1 ) {
stepsize = 1; // make shure we have no endless loop
}
for ( int i = 0; i < keys.size(); i += stepsize ) {
byte[] key = keys.get( i );
byte[] value = data.cache.get( key );
if ( value != null ) {
Object[] values = RowMeta.getRow( data.cacheValueRowMeta, value );
Long tk = data.cacheValueRowMeta.getInteger( values, 0 );
samples.add( tk );
}
}
// Sort these 5 elements...
Collections.sort( samples );
// What is the smallest?
// Take the second, not the fist in the list, otherwise we would be removing a single entry = not good.
if ( samples.size() > 1 ) {
data.smallestCacheKey = samples.get( 1 );
} else if ( !samples.isEmpty() ) { // except when there is only one sample
data.smallestCacheKey = samples.get( 0 );
} else {
// If no samples found nothing to remove, we're done
return;
}
// Remove anything in the cache <= smallest.
// This makes it almost single pass...
// This algorithm is not 100% correct, but I guess it beats sorting the whole cache all the time.
//
for ( int i = 0; i < keys.size(); i++ ) {
byte[] key = keys.get( i );
byte[] value = data.cache.get( key );
if ( value != null ) {
Object[] values = RowMeta.getRow( data.cacheValueRowMeta, value );
long tk = data.cacheValueRowMeta.getInteger( values, 0 ).longValue();
if ( tk <= data.smallestCacheKey ) {
data.cache.remove( key ); // this one has to go.
}
}
}
int sizeAfter = data.cache.size();
logDetailed( "Reduced the lookup cache from " + sizeBefore + " to " + sizeAfter + " rows." );
}
if ( isRowLevel() ) {
logRowlevel( "Cache store: key=" + keyValues + " values=" + returnValues );
}
}
/**
* @return the cache value row metadata. The items that are cached is basically the return row metadata:<br>
* - Technical key (Integer) - Version (Integer) -
*/
private RowMetaInterface assembleCacheValueRowMeta() {
RowMetaInterface cacheRowMeta = data.returnRowMeta.clone();
// The technical key and version are always an Integer...
//
/*
* cacheRowMeta.getValueMeta(0).setType(ValueMetaInterface.TYPE_INTEGER);
* cacheRowMeta.getValueMeta(1).setType(ValueMetaInterface.TYPE_INTEGER);
*/
return cacheRowMeta;
}
private Object[] getFromCache( Object[] keyValues, Date dateValue ) throws KettleValueException {
if ( data.cacheValueRowMeta == null ) {
// nothing in the cache yet, no lookup was ever performed
if ( data.returnRowMeta == null ) {
return null;
}
data.cacheValueRowMeta = assembleCacheValueRowMeta();
}
byte[] key = RowMeta.extractData( data.cacheKeyRowMeta, keyValues );
byte[] value = data.cache.get( key );
if ( value != null ) {
Object[] row = RowMeta.getRow( data.cacheValueRowMeta, value );
// See if the dateValue is between the from and to date ranges...
// The last 2 values are from and to
long time = dateValue.getTime();
long from = ( (Date) row[row.length - 2] ).getTime();
long to = ( (Date) row[row.length - 1] ).getTime();
if ( time >= from && time < to ) { // sanity check to see if we have the right version
if ( isRowLevel() ) {
logRowlevel( "Cache hit: key="
+ data.cacheKeyRowMeta.getString( keyValues ) + " values=" + data.cacheValueRowMeta.getString( row ) );
}
return row;
}
}
return null;
}
public void checkDimZero() throws KettleException {
// Don't insert anything when running in lookup mode.
//
if ( !meta.isUpdate() ) {
return;
}
DatabaseMeta databaseMeta = meta.getDatabaseMeta();
int start_tk = databaseMeta.getNotFoundTK( isAutoIncrement() );
if ( meta.isAutoIncrement() ) {
// See if there are rows in the table
// If so, we can't insert the unknown row anymore...
//
String sql =
"SELECT count(*) FROM "
+ data.schemaTable + " WHERE " + databaseMeta.quoteField( meta.getKeyField() ) + " = " + start_tk;
RowMetaAndData r = data.db.getOneRow( sql );
Long count = r.getRowMeta().getInteger( r.getData(), 0 );
if ( count.longValue() != 0 ) {
return; // Can't insert below the rows already in there...
}
}
String sql =
"SELECT count(*) FROM "
+ data.schemaTable + " WHERE " + databaseMeta.quoteField( meta.getKeyField() ) + " = " + start_tk;
RowMetaAndData r = data.db.getOneRow( sql );
Long count = r.getRowMeta().getInteger( r.getData(), 0 );
if ( count.longValue() == 0 ) {
String isql = null;
try {
if ( !databaseMeta.supportsAutoinc() || !isAutoIncrement() ) {
isql =
"insert into "
+ data.schemaTable + "(" + databaseMeta.quoteField( meta.getKeyField() ) + ", "
+ databaseMeta.quoteField( meta.getVersionField() ) + ") values (0, 1)";
} else {
isql =
databaseMeta.getSQLInsertAutoIncUnknownDimensionRow( data.schemaTable, databaseMeta.quoteField( meta
.getKeyField() ), databaseMeta.quoteField( meta.getVersionField() ) );
}
data.db.execStatement( databaseMeta.stripCR( isql ) );
} catch ( KettleException e ) {
throw new KettleDatabaseException( "Error inserting 'unknown' row in dimension ["
+ data.schemaTable + "] : " + isql, e );
}
}
}
@Override
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (DimensionLookupMeta) smi;
data = (DimensionLookupData) sdi;
if ( super.init( smi, sdi ) ) {
meta.actualizeWithInjectedValues();
data.min_date = meta.getMinDate();
data.max_date = meta.getMaxDate();
data.realSchemaName = environmentSubstitute( meta.getSchemaName() );
data.realTableName = environmentSubstitute( meta.getTableName() );
data.startDateChoice = DimensionLookupMeta.START_DATE_ALTERNATIVE_NONE;
if ( meta.isUsingStartDateAlternative() ) {
data.startDateChoice = meta.getStartDateAlternative();
}
if ( meta.getDatabaseMeta() == null ) {
logError( BaseMessages.getString( PKG, "DimensionLookup.Init.ConnectionMissing", getStepname() ) );
return false;
}
data.db = new Database( this, meta.getDatabaseMeta() );
data.db.shareVariablesWith( this );
try {
if ( getTransMeta().isUsingUniqueConnections() ) {
synchronized ( getTrans() ) {
data.db.connect( getTrans().getTransactionId(), getPartitionID() );
}
} else {
data.db.connect( getPartitionID() );
}
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "DimensionLookup.Log.ConnectedToDB" ) );
}
data.db.setCommit( meta.getCommitSize() );
return true;
} catch ( KettleException ke ) {
logError( BaseMessages.getString( PKG, "DimensionLookup.Log.ErrorOccurredInProcessing" ) + ke.getMessage() );
}
}
return false;
}
@Override
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (DimensionLookupMeta) smi;
data = (DimensionLookupData) sdi;
if ( data.db != null ) {
try {
if ( !data.db.isAutoCommit() ) {
if ( getErrors() == 0 ) {
data.db.commit();
} else {
data.db.rollback();
}
}
} catch ( KettleDatabaseException e ) {
logError( BaseMessages.getString( PKG, "DimensionLookup.Log.ErrorOccurredInProcessing" ) + e.getMessage() );
} finally {
data.db.disconnect();
}
}
super.dispose( smi, sdi );
}
}