/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans;
import java.util.ArrayList;
import java.util.List;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.RowSet;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.trans.TransMeta.TransformationType;
import org.pentaho.di.trans.step.StepMetaDataCombi;
import org.pentaho.di.trans.step.errorhandling.StreamInterface;
public class SingleThreadedTransExecutor {
private Trans trans;
private List<StepMetaDataCombi> steps;
private boolean[] done;
private int nrDone;
private List<List<StreamInterface>> stepInfoStreams;
private List<List<RowSet>> stepInfoRowSets;
private LogChannelInterface log;
public SingleThreadedTransExecutor( final Trans trans ) {
this.trans = trans;
this.log = trans.getLogChannel();
steps = trans.getSteps();
// Always disable thread priority management, it will always slow us down...
//
for ( StepMetaDataCombi combi : steps ) {
combi.step.setUsingThreadPriorityManagment( false );
}
sortSteps();
done = new boolean[steps.size()];
nrDone = 0;
stepInfoStreams = new ArrayList<List<StreamInterface>>();
stepInfoRowSets = new ArrayList<List<RowSet>>();
for ( StepMetaDataCombi combi : steps ) {
List<StreamInterface> infoStreams = combi.stepMeta.getStepMetaInterface().getStepIOMeta().getInfoStreams();
stepInfoStreams.add( infoStreams );
List<RowSet> infoRowSets = new ArrayList<RowSet>();
for ( StreamInterface infoStream : infoStreams ) {
RowSet infoRowSet = trans.findRowSet( infoStream.getStepname(), 0, combi.stepname, 0 );
if ( infoRowSet != null ) {
infoRowSets.add( infoRowSet );
}
}
stepInfoRowSets.add( infoRowSets );
}
}
/**
* Sort the steps from start to finish...
*/
private void sortSteps() {
// The bubble sort algorithm in contrast to the QuickSort or MergeSort
// algorithms
// does indeed cover all possibilities.
// Sorting larger transformations with hundreds of steps might be too slow
// though.
// We should consider caching TransMeta.findPrevious() results in that case.
//
trans.getTransMeta().clearCaches();
//
// Cocktail sort (bi-directional bubble sort)
//
// Original sort was taking 3ms for 30 steps
// cocktail sort takes about 8ms for the same 30, but it works :)
// set these to true if you are working on this algorithm and don't like
// flying blind.
//
boolean testing = true; // log sort details
int stepsMinSize = 0;
int stepsSize = steps.size();
// Noticed a problem with an immediate shrinking iteration window
// trapping rows that need to be sorted.
// This threshold buys us some time to get the sorting close before
// starting to decrease the window size.
//
// TODO: this could become much smarter by tracking row movement
// and reacting to that each outer iteration verses
// using a threshold.
//
// After this many iterations enable trimming inner iteration
// window on no change being detected.
//
int windowShrinkThreshold = (int) Math.round( stepsSize * 0.75 );
// give ourselves some room to sort big lists. the window threshold should
// stop us before reaching this anyway.
//
int totalIterations = stepsSize * 2;
int actualIterations = 0;
boolean isBefore = false;
boolean forwardChange = false;
boolean backwardChange = false;
boolean lastForwardChange = true;
boolean keepSortingForward = true;
StepMetaDataCombi one = null;
StepMetaDataCombi two = null;
StringBuilder tLogString = new StringBuilder(); // this helps group our
// output so other threads
// don't get logs in our
// output.
tLogString.append( "-------------------------------------------------------" ).append( "\n" );
tLogString.append( "--SingleThreadedTransExecutor.sortSteps(cocktail)" ).append( "\n" );
tLogString.append( "--Trans: " ).append( trans.getName() ).append( "\n" );
tLogString.append( "-" ).append( "\n" );
long startTime = System.currentTimeMillis();
for ( int x = 0; x < totalIterations; x++ ) {
// Go forward through the list
//
if ( keepSortingForward ) {
for ( int y = stepsMinSize; y < stepsSize - 1; y++ ) {
one = steps.get( y );
two = steps.get( y + 1 );
isBefore = trans.getTransMeta().findPrevious( one.stepMeta, two.stepMeta );
if ( isBefore ) {
// two was found to be positioned BEFORE one so we need to
// switch them...
//
steps.set( y, two );
steps.set( y + 1, one );
forwardChange = true;
}
}
}
// Go backward through the list
//
for ( int z = stepsSize - 1; z > stepsMinSize; z-- ) {
one = steps.get( z );
two = steps.get( z - 1 );
isBefore = trans.getTransMeta().findPrevious( one.stepMeta, two.stepMeta );
if ( !isBefore ) {
// two was found NOT to be positioned BEFORE one so we need to
// switch them...
//
steps.set( z, two );
steps.set( z - 1, one );
backwardChange = true;
}
}
// Shrink stepsSize(max) if there was no forward change
//
if ( x > windowShrinkThreshold && !forwardChange ) {
// should we keep going? check the window size
//
stepsSize--;
if ( stepsSize <= stepsMinSize ) {
if ( testing ) {
tLogString.append( String.format( "stepsMinSize:%s stepsSize:%s", stepsMinSize, stepsSize ) );
tLogString
.append( "stepsSize is <= stepsMinSize.. exiting outer sort loop. index:" + x ).append( "\n" );
}
break;
}
}
// shrink stepsMinSize(min) if there was no backward change
//
if ( x > windowShrinkThreshold && !backwardChange ) {
// should we keep going? check the window size
//
stepsMinSize++;
if ( stepsMinSize >= stepsSize ) {
if ( testing ) {
tLogString.append( String.format( "stepsMinSize:%s stepsSize:%s", stepsMinSize, stepsSize ) ).append(
"\n" );
tLogString
.append( "stepsMinSize is >= stepsSize.. exiting outer sort loop. index:" + x ).append( "\n" );
}
break;
}
}
// End of both forward and backward traversal.
// Time to see if we should keep going.
//
actualIterations++;
if ( !forwardChange && !backwardChange ) {
if ( testing ) {
tLogString.append( String.format( "existing outer loop because no "
+ "change was detected going forward or backward. index:%s min:%s max:%s",
x, stepsMinSize, stepsSize ) ).append( "\n" );
}
break;
}
//
// if we are past the first iteration and there has been no change twice,
// quit doing it!
//
if ( keepSortingForward && x > 0 && !lastForwardChange && !forwardChange ) {
keepSortingForward = false;
}
lastForwardChange = forwardChange;
forwardChange = false;
backwardChange = false;
} // finished sorting
long endTime = System.currentTimeMillis();
long totalTime = ( endTime - startTime );
tLogString.append( "-------------------------------------------------------" ).append( "\n" );
tLogString.append( "Steps sort time: " + totalTime + "ms" ).append( "\n" );
tLogString.append( "Total iterations: " + actualIterations ).append( "\n" );
tLogString.append( "Step count: " + steps.size() ).append( "\n" );
tLogString.append( "Steps after sort: " ).append( "\n" );
for ( StepMetaDataCombi combi : steps ) {
tLogString.append( combi.step.getStepname() ).append( "\n" );
}
tLogString.append( "-------------------------------------------------------" ).append( "\n" );
if ( log.isDetailed() ) {
log.logDetailed( tLogString.toString() );
}
}
public boolean init() throws KettleException {
// See if the steps support the SingleThreaded transformation type...
//
for ( StepMetaDataCombi combi : steps ) {
TransformationType[] types = combi.stepMeta.getStepMetaInterface().getSupportedTransformationTypes();
boolean ok = false;
for ( TransformationType type : types ) {
if ( type == TransformationType.SingleThreaded ) {
ok = true;
}
}
if ( !ok ) {
throw new KettleException( "Step '"
+ combi.stepname + "' of type '" + combi.stepMeta.getStepID()
+ "' is not yet supported in a Single Threaded transformation engine." );
}
}
// Initialize all the steps...
//
for ( StepMetaDataCombi combi : steps ) {
boolean ok = combi.step.init( combi.meta, combi.data );
if ( !ok ) {
return false;
}
}
return true;
}
/**
* Give all steps in the transformation the chance to process all rows on input...
*
* @return true if more iterations can be performed. False if this is not the case.
*/
public boolean oneIteration() throws KettleException {
for ( int s = 0; s < steps.size() && !trans.isStopped(); s++ ) {
if ( !done[s] ) {
StepMetaDataCombi combi = steps.get( s );
// If this step is waiting for data (text, db, and so on), we simply read all the data
// This means that it is impractical to use this transformation type to load large files.
//
boolean stepDone = false;
// For every input row we call the processRow() method of the step.
//
List<RowSet> infoRowSets = stepInfoRowSets.get( s );
// Loop over info-rowsets FIRST to make sure we support the "Stream Lookup" step and so on.
//
for ( RowSet rowSet : infoRowSets ) {
boolean once = true;
while ( once || ( rowSet.size() > 0 && !stepDone ) ) {
once = false;
stepDone = !combi.step.processRow( combi.meta, combi.data );
if ( combi.step.getErrors() > 0 ) {
return false;
}
}
}
// Do normal processing of input rows...
//
List<RowSet> rowSets = combi.step.getInputRowSets();
// If there are no input row sets, we read all rows until finish.
// This applies to steps like "Table Input", "Text File Input" and so on.
// If they do have an input row set, to get filenames or other parameters,
// we need to handle this in the batchComplete() methods.
//
if ( rowSets.size() == 0 ) {
while ( !stepDone && !trans.isStopped() ) {
stepDone = !combi.step.processRow( combi.meta, combi.data );
if ( combi.step.getErrors() > 0 ) {
return false;
}
}
} else {
// Since we can't be sure that the step actually reads from the row sets where we measure rows,
// we simply count the total nr of rows on input. The steps will find the rows in either row set.
//
int nrRows = 0;
for ( RowSet rowSet : rowSets ) {
nrRows += rowSet.size();
}
// Now do the number of processRows() calls.
//
for ( int i = 0; i < nrRows; i++ ) {
stepDone = !combi.step.processRow( combi.meta, combi.data );
if ( combi.step.getErrors() > 0 ) {
return false;
}
}
}
// Signal the step that a batch of rows has passed for this iteration (sort rows and all)
//
combi.step.batchComplete();
// System.out.println(combi.step.toString()+" : input="+getTotalRows(combi.step.getInputRowSets())+",
// output="+getTotalRows(combi.step.getOutputRowSets()));
if ( stepDone ) {
nrDone++;
}
done[s] = stepDone;
}
}
return nrDone < steps.size() && !trans.isStopped();
}
protected int getTotalRows( List<RowSet> rowSets ) {
int total = 0;
for ( RowSet rowSet : rowSets ) {
total += rowSet.size();
}
return total;
}
public long getErrors() {
return trans.getErrors();
}
public Result getResult() {
return trans.getResult();
}
public boolean isStopped() {
return trans.isStopped();
}
public void dispose() throws KettleException {
// Call output done.
//
for ( StepMetaDataCombi combi : trans.getSteps() ) {
combi.step.setOutputDone();
}
// Finalize all the steps...
//
for ( StepMetaDataCombi combi : steps ) {
combi.step.dispose( combi.meta, combi.data );
combi.step.markStop();
}
}
public Trans getTrans() {
return trans;
}
/**
* Clear the error in the transformation, clear all the rows from all the row sets...
*/
public void clearError() {
trans.clearError();
}
}