/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.uniquerows;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Removes the same consequetive rows from the input stream(s).
*
* @author Matt
* @since 2-jun-2003
*/
public class UniqueRows extends BaseStep implements StepInterface {
private static Class<?> PKG = UniqueRowsMeta.class; // for i18n purposes, needed by Translator2!!
private UniqueRowsMeta meta;
private UniqueRowsData data;
public UniqueRows( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
meta = (UniqueRowsMeta) getStepMeta().getStepMetaInterface();
data = (UniqueRowsData) stepDataInterface; // create new data object.
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (UniqueRowsMeta) smi;
data = (UniqueRowsData) sdi;
Object[] r = getRow(); // get row!
if ( r == null ) { // no more input to be expected...
// Don't forget the last set of rows...
if ( data.previous != null ) {
Object[] outputRow = addCounter( data.outputRowMeta, data.previous, data.counter );
putRow( data.outputRowMeta, outputRow );
}
setOutputDone();
return false;
}
if ( first ) {
// Don't set first to false here like we normally do, because it is being checked outside the
// if(first) block to determine whether to send the row as a duplicate.
data.inputRowMeta = getInputRowMeta().clone();
data.compareRowMeta = getInputRowMeta().clone();
data.outputRowMeta = getInputRowMeta().clone();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
data.previous = data.inputRowMeta.cloneRow( r ); // copy the row
// Cache lookup of fields
data.fieldnrs = new int[meta.getCompareFields().length];
for ( int i = 0; i < meta.getCompareFields().length; i++ ) {
data.fieldnrs[i] = getInputRowMeta().indexOfValue( meta.getCompareFields()[i] );
if ( data.fieldnrs[i] < 0 ) {
logError( BaseMessages.getString(
PKG, "UniqueRows.Log.CouldNotFindFieldInRow", meta.getCompareFields()[i] ) );
setErrors( 1 );
stopAll();
return false;
}
// Change the case insensitive flag...
//
data.compareRowMeta.getValueMeta( data.fieldnrs[i] ).setCaseInsensitive( meta.getCaseInsensitive()[i] );
if ( data.sendDuplicateRows ) {
data.compareFields =
data.compareFields == null ? meta.getCompareFields()[i] : data.compareFields
+ "," + meta.getCompareFields()[i];
}
}
if ( data.sendDuplicateRows && !Utils.isEmpty( meta.getErrorDescription() ) ) {
data.realErrorDescription = environmentSubstitute( meta.getErrorDescription() );
}
}
// Emptied in a previous batch in single threading mode.
//
if ( data.previous == null ) {
data.previous = data.inputRowMeta.cloneRow( r );
}
boolean isEqual = false;
if ( meta.getCompareFields() == null || meta.getCompareFields().length == 0 ) {
// Compare the complete row...
isEqual = data.outputRowMeta.compare( r, data.previous ) == 0;
} else {
isEqual = data.outputRowMeta.compare( r, data.previous, data.fieldnrs ) == 0;
}
if ( !isEqual ) {
Object[] outputRow = addCounter( data.outputRowMeta, data.previous, data.counter );
putRow( data.outputRowMeta, outputRow ); // copy row to possible alternate
// rowset(s).
data.previous = data.inputRowMeta.cloneRow( r );
data.counter = 1;
} else {
data.counter++;
if ( data.sendDuplicateRows && !first ) {
putError( getInputRowMeta(), r, 1, data.realErrorDescription, Utils.isEmpty( data.compareFields )
? null : data.compareFields, "UNR001" );
}
}
if ( checkFeedback( getLinesRead() ) ) {
if ( log.isBasic() ) {
logBasic( BaseMessages.getString( PKG, "UniqueRows.Log.LineNumber" ) + getLinesRead() );
}
}
first = false;
return true;
}
private Object[] addCounter( RowMetaInterface outputRowMeta, Object[] r, long count ) {
if ( meta.isCountRows() ) {
Object[] outputRow = RowDataUtil.addValueData( r, outputRowMeta.size() - 1, new Long( count ) );
return outputRow;
} else {
return r; // nothing to do
}
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (UniqueRowsMeta) smi;
data = (UniqueRowsData) sdi;
if ( super.init( smi, sdi ) ) {
// Add init code here.
data.sendDuplicateRows = getStepMeta().getStepErrorMeta() != null && meta.supportsErrorHandling();
return true;
}
return false;
}
@Override
public void batchComplete() throws KettleException {
// If there's a previous row, output it at the end of the batch...
//
if ( data.previous != null ) {
Object[] outputRow = addCounter( data.outputRowMeta, data.previous, data.counter );
putRow( data.outputRowMeta, outputRow );
data.previous = null;
}
}
}