/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.samplerows;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.Range;
/**
* Sample rows. Filter rows based on line number
*
* @author Samatar
* @since 2-jun-2003
*/
public class SampleRows extends BaseStep implements StepInterface {
private static Class<?> PKG = SampleRowsMeta.class; // for i18n purposes, needed by Translator2!!
private SampleRowsMeta meta;
private SampleRowsData data;
public SampleRows( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (SampleRowsMeta) smi;
data = (SampleRowsData) sdi;
Object[] r = getRow(); // get row, set busy!
if ( r == null ) { // no more input to be expected...
setOutputDone();
return false;
}
if ( first ) {
first = false;
String realRange = environmentSubstitute( meta.getLinesRange() );
data.addlineField = ( !Utils.isEmpty( environmentSubstitute( meta.getLineNumberField() ) ) );
// get the RowMeta
data.previousRowMeta = getInputRowMeta().clone();
data.NrPrevFields = data.previousRowMeta.size();
data.outputRowMeta = data.previousRowMeta;
if ( data.addlineField ) {
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
}
String[] rangePart = realRange.split( "," );
ImmutableRangeSet.Builder<Integer> setBuilder = ImmutableRangeSet.builder();
for ( String part : rangePart ) {
if ( part.matches( "\\d+" ) ) {
if ( log.isDebug() ) {
logDebug( BaseMessages.getString( PKG, "SampleRows.Log.RangeValue", part ) );
}
int vpart = Integer.valueOf( part );
setBuilder.add( Range.singleton( vpart ) );
} else if ( part.matches( "\\d+\\.\\.\\d+" ) ) {
String[] rangeMultiPart = part.split( "\\.\\." );
Integer start = Integer.valueOf( rangeMultiPart[0] );
Integer end = Integer.valueOf( rangeMultiPart[1] );
Range<Integer> range = Range.closed( start, end );
if ( log.isDebug() ) {
logDebug( BaseMessages.getString( PKG, "SampleRows.Log.RangeValue", range ) );
}
setBuilder.add( range );
}
}
data.rangeSet = setBuilder.build();
} // end if first
if ( data.addlineField ) {
data.outputRow = RowDataUtil.allocateRowData( data.outputRowMeta.size() );
for ( int i = 0; i < data.NrPrevFields; i++ ) {
data.outputRow[i] = r[i];
}
} else {
data.outputRow = r;
}
int linesRead = (int) getLinesRead();
if ( data.rangeSet.contains( linesRead ) ) {
if ( data.addlineField ) {
data.outputRow[data.NrPrevFields] = getLinesRead();
}
// copy row to possible alternate rowset(s).
//
putRow( data.outputRowMeta, data.outputRow );
if ( log.isRowLevel() ) {
logRowlevel( BaseMessages.getString( PKG, "SampleRows.Log.LineNumber", linesRead
+ " : " + getInputRowMeta().getString( r ) ) );
}
}
// Check if maximum value has been exceeded
if ( data.rangeSet.isEmpty() || linesRead >= data.rangeSet.span().upperEndpoint() ) {
setOutputDone();
}
// Allowed to continue to read in data
return true;
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (SampleRowsMeta) smi;
data = (SampleRowsData) sdi;
if ( super.init( smi, sdi ) ) {
// Add init code here.
return true;
}
return false;
}
}