/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.tablecompare;
import java.sql.ResultSet;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* @author Matt
* @since 19-11-2009
*/
public class TableCompare extends BaseStep implements StepInterface {
private static Class<?> PKG = TableCompare.class; // for i18n purposes, needed by Translator2!!
private TableCompareMeta meta;
private TableCompareData data;
public TableCompare( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
meta = (TableCompareMeta) getStepMeta().getStepMetaInterface();
data = (TableCompareData) stepDataInterface;
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (TableCompareMeta) smi;
data = (TableCompareData) sdi;
Object[] r = getRow(); // get row, set busy!
if ( r == null ) { // no more input to be expected...
setOutputDone();
return false;
}
if ( first ) {
first = false;
// What's the format of the output row?
//
data.outputRowMeta = getInputRowMeta().clone();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
// Reference schema
//
if ( Utils.isEmpty( meta.getReferenceSchemaField() ) ) {
throw new KettleException( BaseMessages.getString(
PKG, "TableCompare.Exception.ReferenceSchemaNotSpecified" ) );
}
data.refSchemaIndex = getInputRowMeta().indexOfValue( meta.getReferenceSchemaField() );
if ( data.refSchemaIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getReferenceSchemaField() ) );
}
// Reference table
//
if ( Utils.isEmpty( meta.getReferenceTableField() ) ) {
throw new KettleException( BaseMessages.getString(
PKG, "TableCompare.Exception.ReferenceTableNotSpecified" ) );
}
data.refTableIndex = getInputRowMeta().indexOfValue( meta.getReferenceTableField() );
if ( data.refTableIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getReferenceTableField() ) );
}
// Compare schema
//
if ( Utils.isEmpty( meta.getCompareSchemaField() ) ) {
throw new KettleException( BaseMessages
.getString( PKG, "TableCompare.Exception.CompareSchemaNotSpecified" ) );
}
data.cmpSchemaIndex = getInputRowMeta().indexOfValue( meta.getCompareSchemaField() );
if ( data.cmpSchemaIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getCompareSchemaField() ) );
}
// Compare table
//
if ( Utils.isEmpty( meta.getCompareTableField() ) ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CompareTableNotSpecified" ) );
}
data.cmpTableIndex = getInputRowMeta().indexOfValue( meta.getCompareTableField() );
if ( data.cmpTableIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getCompareTableField() ) );
}
// Key fields
//
if ( Utils.isEmpty( meta.getKeyFieldsField() ) ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.KeyFieldsNotSpecified" ) );
}
data.keyFieldsIndex = getInputRowMeta().indexOfValue( meta.getKeyFieldsField() );
if ( data.keyFieldsIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getKeyFieldsField() ) );
}
// Exclude fields
//
if ( Utils.isEmpty( meta.getExcludeFieldsField() ) ) {
throw new KettleException( BaseMessages
.getString( PKG, "TableCompare.Exception.ExcludeFieldsNotSpecified" ) );
}
data.excludeFieldsIndex = getInputRowMeta().indexOfValue( meta.getExcludeFieldsField() );
if ( data.excludeFieldsIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getExcludeFieldsField() ) );
}
// error handling: Key description
//
if ( Utils.isEmpty( meta.getKeyDescriptionField() ) ) {
throw new KettleException( BaseMessages.getString(
PKG, "TableCompare.Exception.KeyDescriptionFieldNotSpecified" ) );
}
data.keyDescIndex = getInputRowMeta().indexOfValue( meta.getKeyDescriptionField() );
if ( data.keyDescIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getKeyDescriptionField() ) );
}
// error handling: reference value
//
if ( Utils.isEmpty( meta.getValueReferenceField() ) ) {
throw new KettleException( BaseMessages.getString(
PKG, "TableCompare.Exception.ValueReferenceFieldNotSpecified" ) );
}
data.valueReferenceIndex = getInputRowMeta().indexOfValue( meta.getValueReferenceField() );
if ( data.valueReferenceIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getValueReferenceField() ) );
}
// error handling: compare value
//
if ( Utils.isEmpty( meta.getValueCompareField() ) ) {
throw new KettleException( BaseMessages.getString(
PKG, "TableCompare.Exception.ValueCompareFieldNotSpecified" ) );
}
data.valueCompareIndex = getInputRowMeta().indexOfValue( meta.getValueCompareField() );
if ( data.valueCompareIndex < 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "TableCompare.Exception.CanNotFindField", meta
.getValueCompareField() ) );
}
} // end if first
Object[] fields = compareTables( getInputRowMeta(), r );
Object[] outputRowData = RowDataUtil.addRowData( r, getInputRowMeta().size(), fields );
putRow( data.outputRowMeta, outputRowData ); // copy row to output rowset(s);
return true;
}
private Object[] compareTables( RowMetaInterface rowMeta, Object[] r ) throws KettleException {
try {
String referenceSchema = getInputRowMeta().getString( r, data.refSchemaIndex );
String referenceTable = getInputRowMeta().getString( r, data.refTableIndex );
String compareSchema = getInputRowMeta().getString( r, data.cmpSchemaIndex );
String compareTable = getInputRowMeta().getString( r, data.cmpTableIndex );
String keyFields = getInputRowMeta().getString( r, data.keyFieldsIndex );
String excludeFields = getInputRowMeta().getString( r, data.excludeFieldsIndex );
return compareTables(
rowMeta, r, referenceSchema, referenceTable, compareSchema, compareTable, keyFields, excludeFields );
} catch ( Exception e ) {
throw new KettleException( BaseMessages.getString(
PKG, "TableCompare.Exception.UnexpectedErrorComparingTables" ), e );
}
}
private Object[] compareTables( RowMetaInterface rowMeta, Object[] r, String referenceSchema,
String referenceTable, String compareSchema, String compareTable, String keyFields, String excludeFields ) throws KettleException {
long nrErrors = 0L;
long nrLeftErrors = 0L;
long nrRightErrors = 0L;
long nrInnerErrors = 0L;
long nrRecordsReference = 0L;
long nrRecordsCompare = 0L;
Object[] result = new Object[6];
if ( Utils.isEmpty( referenceTable ) ) {
Object[] errorRowData = constructErrorRow( rowMeta, r, null, null, null );
putError( data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Exception.NoReferenceTableDefined" ), null, "TAC008" );
nrErrors++;
}
if ( Utils.isEmpty( compareTable ) ) {
Object[] errorRowData = constructErrorRow( rowMeta, r, null, null, null );
putError( data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Exception.NoCompareTableDefined" ), null, "TAC008" );
nrErrors++;
}
String refSchemaTable =
meta.getReferenceConnection().getQuotedSchemaTableCombination( referenceSchema, referenceTable );
String cmpSchemaTable =
meta.getCompareConnection().getQuotedSchemaTableCombination( compareSchema, compareTable );
if ( Utils.isEmpty( keyFields ) ) {
Object[] errorRowData = constructErrorRow( rowMeta, r, null, null, null );
putError( data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Exception.NoKeyFieldsDefined", refSchemaTable, cmpSchemaTable ), null, "TAC007" );
nrErrors++;
}
// If something is wrong here, we can't continue...
//
if ( nrErrors > 0 ) {
result[0] = Long.valueOf( nrErrors );
return result;
}
String[] keys = keyFields.split( "," );
for ( int i = 0; i < keys.length; i++ ) {
keys[i] = Kjube.trim( keys[i] );
}
String[] excluded = Utils.isEmpty( excludeFields ) ? new String[0] : excludeFields.split( "," );
for ( int i = 0; i < excluded.length; i++ ) {
excluded[i] = Kjube.trim( excluded[i] );
}
try {
RowMetaInterface refFields = data.referenceDb.getTableFields( refSchemaTable );
RowMetaInterface cmpFields = data.compareDb.getTableFields( cmpSchemaTable );
// Remove the excluded fields from these fields...
//
for ( String field : excluded ) {
if ( refFields.indexOfValue( field ) >= 0 ) {
refFields.removeValueMeta( field );
}
if ( cmpFields.indexOfValue( field ) >= 0 ) {
cmpFields.removeValueMeta( field );
}
}
// See if the 2 tables have the same nr of fields in it...
//
if ( refFields.size() != cmpFields.size() ) {
Object[] errorRowData = constructErrorRow( rowMeta, r, null, null, null );
putError( data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Error.NumberOfFieldsIsDifferent", refSchemaTable, Integer.toString( refFields
.size() ), cmpSchemaTable, Integer.toString( cmpFields.size() ) ), null, "TAC001" );
nrErrors++;
} else {
// See if all the key fields exist in the reference & compare tables...
//
for ( String key : keys ) {
if ( refFields.indexOfValue( key ) < 0 ) {
if ( getStepMeta().isDoingErrorHandling() ) {
Object[] errorRowData = constructErrorRow( rowMeta, r, null, null, null );
putError(
data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Error.KeyFieldWasNotFoundInReferenceTable", key, refSchemaTable ), null,
"TAC002" );
}
nrErrors++;
}
}
for ( String key : keys ) {
if ( cmpFields.indexOfValue( key ) < 0 ) {
if ( getStepMeta().isDoingErrorHandling() ) {
Object[] errorRowData = constructErrorRow( rowMeta, r, null, null, null );
putError(
data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Error.KeyFieldWasNotFoundInCompareTable", key, refSchemaTable ), null,
"TAC003" );
}
nrErrors++;
}
}
// If we can't find all key fields, stop here...
//
if ( nrErrors > 0 ) {
result[0] = Long.valueOf( nrErrors );
return result;
}
// Now we read the data from both tables and compare keys and values...
// First we construct the SQL
//
RowMetaInterface keyRowMeta = new RowMeta();
RowMetaInterface valueRowMeta = new RowMeta();
int[] keyNrs = new int[keys.length];
String refSql = "SELECT ";
String cmpSql = "SELECT ";
for ( int i = 0; i < keys.length; i++ ) {
if ( i > 0 ) {
refSql += ", ";
cmpSql += ", ";
}
keyNrs[i] = i;
refSql += meta.getReferenceConnection().quoteField( keys[i] );
cmpSql += meta.getReferenceConnection().quoteField( keys[i] );
}
int[] valueNrs = new int[refFields.size() - keys.length];
int valueNr = keys.length;
int valueIndex = 0;
for ( int i = 0; i < refFields.getFieldNames().length; i++ ) {
String field = refFields.getFieldNames()[i];
if ( Const.indexOfString( field, keys ) < 0 ) {
refSql += ", " + meta.getReferenceConnection().quoteField( field );
valueRowMeta.addValueMeta( refFields.searchValueMeta( field ) );
valueNrs[valueIndex++] = valueNr++;
}
}
for ( String field : cmpFields.getFieldNames() ) {
if ( Const.indexOfString( field, keys ) < 0 ) {
cmpSql += ", " + meta.getCompareConnection().quoteField( field );
}
}
refSql += " FROM " + refSchemaTable + " ORDER BY ";
cmpSql += " FROM " + cmpSchemaTable + " ORDER BY ";
for ( int i = 0; i < keys.length; i++ ) {
if ( i > 0 ) {
refSql += ", ";
cmpSql += ", ";
}
refSql += meta.getReferenceConnection().quoteField( keys[i] );
cmpSql += meta.getReferenceConnection().quoteField( keys[i] );
}
// Now we execute the SQL...
//
ResultSet refSet = data.referenceDb.openQuery( refSql );
ResultSet cmpSet = data.compareDb.openQuery( cmpSql );
// Now grab rows of data and start comparing the individual rows ...
//
RowMetaInterface oneMeta = null, twoMeta = null;
Object[] one = data.referenceDb.getRow( refSet );
if ( one != null ) {
incrementLinesInput();
if ( oneMeta == null ) {
oneMeta = data.referenceDb.getReturnRowMeta();
for ( int i = 0; i < keys.length; i++ ) {
keyRowMeta.addValueMeta( oneMeta.searchValueMeta( keys[i] ) );
}
}
}
Object[] two = data.compareDb.getRow( cmpSet );
if ( two != null ) {
incrementLinesInput();
if ( twoMeta == null ) {
twoMeta = data.compareDb.getReturnRowMeta();
if ( keyRowMeta.isEmpty() ) {
for ( int i = 0; i < keys.length; i++ ) {
keyRowMeta.addValueMeta( twoMeta.searchValueMeta( keys[i] ) );
}
}
}
}
if ( one != null ) {
nrRecordsReference++;
}
if ( two != null ) {
nrRecordsCompare++;
}
do {
if ( one == null && two != null ) {
// A new record found in the compare table...
//
if ( getStepMeta().isDoingErrorHandling() ) {
String keyDesc = getKeyDesc( keyRowMeta, keyNrs, two );
Object[] errorRowData = constructErrorRow( rowMeta, r, keyDesc, null, null );
putError( data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Error.RecordNotInReferenceFoundInCompareTable", cmpSchemaTable, keyRowMeta
.getString( two ) ), null, "TAC004" );
}
nrErrors++;
nrRightErrors++;
two = data.compareDb.getRow( cmpSet );
if ( two != null ) {
nrRecordsCompare++;
}
} else if ( one != null && two == null ) {
// A new record found in the reference table...
//
if ( getStepMeta().isDoingErrorHandling() ) {
String keyDesc = getKeyDesc( keyRowMeta, keyNrs, one );
Object[] errorRowData = constructErrorRow( rowMeta, r, keyDesc, null, null );
putError( data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Error.RecordInReferenceNotFoundInCompareTable", refSchemaTable, keyRowMeta
.getString( one ) ), null, "TAC005" );
}
nrErrors++;
nrLeftErrors++;
one = data.referenceDb.getRow( refSet );
if ( one != null ) {
nrRecordsReference++;
}
} else {
if ( one != null && two != null ) {
// both records are populated, compare the records...
//
int compare = oneMeta.compare( one, two, keyNrs );
if ( compare == 0 ) { // The Key matches, we CAN compare the two rows...
int compareValues = oneMeta.compare( one, two, valueNrs );
if ( compareValues != 0 ) {
// Return the compare (most recent) row
//
if ( getStepMeta().isDoingErrorHandling() ) {
// Give some details on what is wrong... (fields, values, etc)
//
for ( int idx : valueNrs ) {
ValueMetaInterface valueMeta = oneMeta.getValueMeta( idx );
Object oneData = one[idx];
Object twoData = two[idx];
int cmp = valueMeta.compare( oneData, twoData );
if ( cmp != 0 ) {
String keyDesc = getKeyDesc( keyRowMeta, keyNrs, one );
String quote = valueMeta.isString() ? "'" : "";
String referenceData = quote + valueMeta.getString( oneData ) + quote;
String compareData = quote + valueMeta.getString( twoData ) + quote;
Object[] errorRowData =
constructErrorRow( rowMeta, r, keyDesc, referenceData, compareData );
putError(
data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Error.CompareRowIsDifferentFromReference" ), valueMeta
.getName(), "TAC006" );
}
}
}
nrErrors++;
nrInnerErrors++;
}
// Get a new row from both streams...
one = data.referenceDb.getRow( refSet );
if ( one != null ) {
nrRecordsReference++;
}
two = data.compareDb.getRow( cmpSet );
if ( two != null ) {
nrRecordsCompare++;
}
} else {
if ( compare < 0 ) {
if ( getStepMeta().isDoingErrorHandling() ) {
String keyDesc = getKeyDesc( keyRowMeta, keyNrs, one );
Object[] errorRowData = constructErrorRow( rowMeta, r, keyDesc, null, null );
putError( data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Error.RecordNotInReferenceFoundInCompareTable", cmpSchemaTable,
keyRowMeta.getString( one ) ), null, "TAC004" );
}
nrErrors++;
nrRightErrors++;
one = data.referenceDb.getRow( refSet );
if ( one != null ) {
nrRecordsReference++;
}
} else {
if ( getStepMeta().isDoingErrorHandling() ) {
String keyDesc = getKeyDesc( keyRowMeta, keyNrs, two );
Object[] errorRowData = constructErrorRow( rowMeta, r, keyDesc, null, null );
putError( data.errorRowMeta, errorRowData, 1, BaseMessages.getString(
PKG, "TableCompare.Error.RecordInReferenceNotFoundInCompareTable", refSchemaTable,
keyRowMeta.getString( two ) ), null, "TAC005" );
}
nrErrors++;
nrLeftErrors++;
two = data.compareDb.getRow( cmpSet );
if ( two != null ) {
nrRecordsCompare++;
}
}
}
}
}
} while ( ( one != null || two != null ) && !isStopped() );
refSet.close();
cmpSet.close();
}
} catch ( Exception e ) {
throw new KettleException( BaseMessages.getString(
PKG, "TableCompare.Exception.UnexpectedErrorComparingTables", refSchemaTable, cmpSchemaTable ), e );
}
int index = 0;
result[index++] = Long.valueOf( nrErrors );
result[index++] = Long.valueOf( nrRecordsReference );
result[index++] = Long.valueOf( nrRecordsCompare );
result[index++] = Long.valueOf( nrLeftErrors );
result[index++] = Long.valueOf( nrInnerErrors );
result[index++] = Long.valueOf( nrRightErrors );
r[data.keyDescIndex] = null;
r[data.valueReferenceIndex] = null;
r[data.valueCompareIndex] = null;
return result;
}
private String getKeyDesc( RowMetaInterface keyRowMeta, int[] keyNrs, Object[] one ) throws KettleException {
StringBuilder keyDesc = new StringBuilder();
for ( int x = 0; x < keyNrs.length; x++ ) {
ValueMetaInterface keyValueMeta = keyRowMeta.getValueMeta( x );
Object keyValueData = one[keyNrs[x]];
if ( keyDesc.length() > 0 ) {
keyDesc.append( " and " );
}
keyDesc.append( keyValueMeta.getName() ).append( " = '" );
keyDesc.append( keyValueMeta.getString( keyValueData ) );
keyDesc.append( "'" );
}
return keyDesc.toString();
}
private Object[] constructErrorRow( RowMetaInterface rowMeta, Object[] r, String keyField,
String referenceValue, String compareValue ) throws KettleException {
if ( data.errorRowMeta == null ) {
data.errorRowMeta = rowMeta.clone();
}
r[data.keyDescIndex] = keyField;
r[data.valueReferenceIndex] = referenceValue;
r[data.valueCompareIndex] = compareValue;
return r;
}
@Override
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (TableCompareMeta) smi;
data = (TableCompareData) sdi;
if ( super.init( smi, sdi ) ) {
try {
data.referenceDb = new Database( this, meta.getReferenceConnection() );
data.referenceDb.connect();
} catch ( Exception e ) {
logError( BaseMessages.getString(
PKG, "TableCompare.Exception.UnexpectedErrorConnectingToReferenceDatabase", meta
.getReferenceConnection().getName() ), e );
return false;
}
try {
data.compareDb = new Database( this, meta.getCompareConnection() );
data.compareDb.connect();
} catch ( Exception e ) {
logError( BaseMessages.getString(
PKG, "TableCompare.Exception.UnexpectedErrorConnectingToCompareDatabase", meta
.getCompareConnection().getName() ), e );
return false;
}
return true;
}
return false;
}
@Override
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (TableCompareMeta) smi;
data = (TableCompareData) sdi;
if ( data.referenceDb != null ) {
data.referenceDb.disconnect();
}
if ( data.compareDb != null ) {
data.compareDb.disconnect();
}
super.dispose( smi, sdi );
}
}