/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.transexecutor; import java.util.ArrayList; import com.google.common.annotations.VisibleForTesting; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.Result; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.RowSet; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.logging.KettleLogStore; import org.pentaho.di.core.logging.LoggingRegistry; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.DelegationListener; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransExecutionConfiguration; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.pentaho.di.trans.steps.TransStepUtil; /** * Execute a transformation for every input row, set parameters. * <p> * <b>Note:</b><br/> * Be aware, logic of the classes methods is very similar to corresponding methods of * {@link org.pentaho.di.trans.steps.jobexecutor.JobExecutor JobExecutor}. * If you change something in this class, consider copying your changes to JobExecutor as well. * </p> * * @author Matt * @since 18-mar-2013 */ public class TransExecutor extends BaseStep implements StepInterface { private static final Class<?> PKG = TransExecutorMeta.class; // for i18n purposes, needed by Translator2!! private TransExecutorMeta meta; private TransExecutorData data; public TransExecutor( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } /** * Process a single row. In our case, we send one row of data to a piece of transformation. In the transformation, we * look up the MappingInput step to send our rows to it. As a consequence, for the time being, there can only be one * MappingInput and one MappingOutput step in the TransExecutor. */ public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { try { meta = (TransExecutorMeta) smi; setData( (TransExecutorData) sdi ); TransExecutorData transExecutorData = getData(); // Wait for a row... Object[] row = getRow(); if ( row == null ) { executeTransformation(); setOutputDone(); return false; } if ( first ) { first = false; initOnFirstProcessingIteration(); } RowSet executorStepOutputRowSet = transExecutorData.getExecutorStepOutputRowSet(); if ( transExecutorData.getExecutorStepOutputRowMeta() != null && executorStepOutputRowSet != null ) { putRowTo( transExecutorData.getExecutorStepOutputRowMeta(), row, executorStepOutputRowSet ); } // Grouping by field and execution time works ONLY if grouping by size is disabled. if ( transExecutorData.groupSize < 0 ) { if ( transExecutorData.groupFieldIndex >= 0 ) { // grouping by field Object groupFieldData = row[ transExecutorData.groupFieldIndex ]; if ( transExecutorData.prevGroupFieldData != null ) { if ( transExecutorData.groupFieldMeta.compare( transExecutorData.prevGroupFieldData, groupFieldData ) != 0 ) { executeTransformation(); } } transExecutorData.prevGroupFieldData = groupFieldData; } else if ( transExecutorData.groupTime > 0 ) { // grouping by execution time long now = System.currentTimeMillis(); if ( now - transExecutorData.groupTimeStart >= transExecutorData.groupTime ) { executeTransformation(); } } } // Add next value AFTER transformation execution, in case we are grouping by field (see PDI-14958), // and BEFORE checking size of a group, in case we are grouping by size (see PDI-14121). transExecutorData.groupBuffer.add( new RowMetaAndData( getInputRowMeta(), row ) ); // should we clone for safety? // Grouping by size. // If group buffer size exceeds specified limit, then execute transformation and flush group buffer. if ( transExecutorData.groupSize > 0 ) { if ( transExecutorData.groupBuffer.size() >= transExecutorData.groupSize ) { executeTransformation(); } } return true; } catch ( Exception e ) { throw new KettleException( BaseMessages.getString( PKG, "TransExecutor.UnexpectedError" ), e ); } } private void initOnFirstProcessingIteration() throws KettleException { TransExecutorData transExecutorData = getData(); // internal transformation's first step has exactly the same input transExecutorData.setInputRowMeta( getInputRowMeta() ); // internal transformation's execution results transExecutorData.setExecutionResultsOutputRowMeta( new RowMeta() ); if ( meta.getExecutionResultTargetStepMeta() != null ) { meta.prepareExecutionResultsFields( transExecutorData.getExecutionResultsOutputRowMeta(), meta.getExecutionResultTargetStepMeta() ); transExecutorData .setExecutionResultRowSet( findOutputRowSet( meta.getExecutionResultTargetStepMeta().getName() ) ); } // internal transformation's execution result's file transExecutorData.setResultFilesOutputRowMeta( new RowMeta() ); if ( meta.getResultFilesTargetStepMeta() != null ) { meta.prepareExecutionResultsFileFields( transExecutorData.getResultFilesOutputRowMeta(), meta.getResultFilesTargetStepMeta() ); transExecutorData.setResultFilesRowSet( findOutputRowSet( meta.getResultFilesTargetStepMeta().getName() ) ); } // internal transformation's execution output transExecutorData.setResultRowsOutputRowMeta( new RowMeta() ); if ( meta.getOutputRowsSourceStepMeta() != null ) { meta.prepareResultsRowsFields( transExecutorData.getResultRowsOutputRowMeta() ); transExecutorData.setResultRowsRowSet( findOutputRowSet( meta.getOutputRowsSourceStepMeta().getName() ) ); } // executor's self output is exactly its input if ( meta.getExecutorsOutputStepMeta() != null ) { transExecutorData.setExecutorStepOutputRowMeta( getInputRowMeta().clone() ); transExecutorData.setExecutorStepOutputRowSet( findOutputRowSet( meta.getExecutorsOutputStepMeta().getName() ) ); } // Remember which column to group on, if any... transExecutorData.groupFieldIndex = -1; if ( !Utils.isEmpty( transExecutorData.groupField ) ) { transExecutorData.groupFieldIndex = getInputRowMeta().indexOfValue( transExecutorData.groupField ); if ( transExecutorData.groupFieldIndex < 0 ) { throw new KettleException( BaseMessages.getString( PKG, "TransExecutor.Exception.GroupFieldNotFound", transExecutorData.groupField ) ); } transExecutorData.groupFieldMeta = getInputRowMeta().getValueMeta( transExecutorData.groupFieldIndex ); } } private void executeTransformation() throws KettleException { TransExecutorData transExecutorData = getData(); // If we got 0 rows on input we don't really want to execute the transformation if ( transExecutorData.groupBuffer.isEmpty() ) { return; } transExecutorData.groupTimeStart = System.currentTimeMillis(); if ( first ) { discardLogLines( transExecutorData ); } Trans executorTrans = createInternalTrans(); transExecutorData.setExecutorTrans( executorTrans ); // Pass parameter values passParametersToTrans(); // keep track for drill down in Spoon... getTrans().addActiveSubTransformation( getStepname(), executorTrans ); Result result = new Result(); result.setRows( transExecutorData.groupBuffer ); executorTrans.setPreviousResult( result ); try { executorTrans.prepareExecution( getTrans().getArguments() ); // run transformation executorTrans.startThreads(); // Inform the parent transformation we started something here... for ( DelegationListener delegationListener : getTrans().getDelegationListeners() ) { // TODO: copy some settings in the transformation execution configuration, not strictly needed // but the execution configuration information is useful in case of a transformation re-start on Carte delegationListener.transformationDelegationStarted( executorTrans, new TransExecutionConfiguration() ); } // Wait a while until we're done with the transformation executorTrans.waitUntilFinished(); result = executorTrans.getResult(); } catch ( KettleException e ) { log.logError( "An error occurred executing the transformation: ", e ); result.setResult( false ); result.setNrErrors( 1 ); } collectTransResults( result ); collectExecutionResults( result ); collectExecutionResultFiles( result ); transExecutorData.groupBuffer.clear(); } @VisibleForTesting void discardLogLines( TransExecutorData transExecutorData ) { // Keep the strain on the logging back-end conservative. // TODO: make this optional/user-defined later Trans executorTrans = transExecutorData.getExecutorTrans(); if ( executorTrans != null ) { KettleLogStore.discardLines( executorTrans.getLogChannelId(), false ); LoggingRegistry.getInstance().removeIncludingChildren( executorTrans.getLogChannelId() ); } } @VisibleForTesting Trans createInternalTrans() throws KettleException { Trans executorTrans = new Trans( getData().getExecutorTransMeta(), this ); executorTrans.setParentTrans( getTrans() ); executorTrans.setRepository( getTrans().getRepository() ); executorTrans.setLogLevel( getLogLevel() ); executorTrans.setArguments( getTrans().getArguments() ); if ( meta.getParameters().isInheritingAllVariables() ) { executorTrans.shareVariablesWith( this ); } executorTrans.setInternalKettleVariables( this ); executorTrans.copyParametersFrom( getData().getExecutorTransMeta() ); executorTrans.setPreview( getTrans().isPreview() ); TransStepUtil.initServletConfig( getTrans(), executorTrans ); return executorTrans; } private void passParametersToTrans() throws KettleException { // Set parameters, when fields are used take the first row in the set. TransExecutorParameters parameters = meta.getParameters(); Trans internalTrans = getData().getExecutorTrans(); internalTrans.clearParameters(); String[] parameterNames = internalTrans.listParameters(); for ( int i = 0; i < parameters.getVariable().length; i++ ) { String variable = parameters.getVariable()[ i ]; String fieldName = parameters.getField()[ i ]; String inputValue = parameters.getInput()[ i ]; String value; // Take the value from an input row or from a static value? if ( !Utils.isEmpty( fieldName ) ) { int idx = getInputRowMeta().indexOfValue( fieldName ); if ( idx < 0 ) { throw new KettleException( BaseMessages.getString( PKG, "TransExecutor.Exception.UnableToFindField", fieldName ) ); } value = getData().groupBuffer.get( 0 ).getString( idx, "" ); } else { value = environmentSubstitute( inputValue ); } // See if this is a parameter or just a variable... if ( Const.indexOfString( variable, parameterNames ) < 0 ) { internalTrans.setVariable( variable, Const.NVL( value, "" ) ); } else { internalTrans.setParameterValue( variable, Const.NVL( value, "" ) ); } } internalTrans.activateParameters(); } @VisibleForTesting void collectTransResults( Result result ) throws KettleException { RowSet transResultsRowSet = getData().getResultRowsRowSet(); if ( meta.getOutputRowsSourceStepMeta() != null && transResultsRowSet != null ) { for ( RowMetaAndData metaAndData : result.getRows() ) { putRowTo( metaAndData.getRowMeta(), metaAndData.getData(), transResultsRowSet ); } } } @VisibleForTesting void collectExecutionResults( Result result ) throws KettleException { RowSet executionResultsRowSet = getData().getExecutionResultRowSet(); if ( meta.getExecutionResultTargetStepMeta() != null && executionResultsRowSet != null ) { Object[] outputRow = RowDataUtil.allocateRowData( getData().getExecutionResultsOutputRowMeta().size() ); int idx = 0; if ( !Utils.isEmpty( meta.getExecutionTimeField() ) ) { outputRow[ idx++ ] = Long.valueOf( System.currentTimeMillis() - getData().groupTimeStart ); } if ( !Utils.isEmpty( meta.getExecutionResultField() ) ) { outputRow[ idx++ ] = Boolean.valueOf( result.getResult() ); } if ( !Utils.isEmpty( meta.getExecutionNrErrorsField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrErrors() ); } if ( !Utils.isEmpty( meta.getExecutionLinesReadField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrLinesRead() ); } if ( !Utils.isEmpty( meta.getExecutionLinesWrittenField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrLinesWritten() ); } if ( !Utils.isEmpty( meta.getExecutionLinesInputField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrLinesInput() ); } if ( !Utils.isEmpty( meta.getExecutionLinesOutputField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrLinesOutput() ); } if ( !Utils.isEmpty( meta.getExecutionLinesRejectedField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrLinesRejected() ); } if ( !Utils.isEmpty( meta.getExecutionLinesUpdatedField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrLinesUpdated() ); } if ( !Utils.isEmpty( meta.getExecutionLinesDeletedField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrLinesDeleted() ); } if ( !Utils.isEmpty( meta.getExecutionFilesRetrievedField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getNrFilesRetrieved() ); } if ( !Utils.isEmpty( meta.getExecutionExitStatusField() ) ) { outputRow[ idx++ ] = Long.valueOf( result.getExitStatus() ); } if ( !Utils.isEmpty( meta.getExecutionLogTextField() ) ) { String channelId = getData().getExecutorTrans().getLogChannelId(); String logText = KettleLogStore.getAppender().getBuffer( channelId, false ).toString(); outputRow[ idx++ ] = logText; } if ( !Utils.isEmpty( meta.getExecutionLogChannelIdField() ) ) { outputRow[ idx++ ] = getData().getExecutorTrans().getLogChannelId(); } putRowTo( getData().getExecutionResultsOutputRowMeta(), outputRow, executionResultsRowSet ); } } @VisibleForTesting void collectExecutionResultFiles( Result result ) throws KettleException { RowSet resultFilesRowSet = getData().getResultFilesRowSet(); if ( meta.getResultFilesTargetStepMeta() != null && result.getResultFilesList() != null && resultFilesRowSet != null ) { for ( ResultFile resultFile : result.getResultFilesList() ) { Object[] targetRow = RowDataUtil.allocateRowData( getData().getResultFilesOutputRowMeta().size() ); int idx = 0; targetRow[ idx++ ] = resultFile.getFile().getName().toString(); // TODO: time, origin, ... putRowTo( getData().getResultFilesOutputRowMeta(), targetRow, resultFilesRowSet ); } } } public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (TransExecutorMeta) smi; setData( (TransExecutorData) sdi ); TransExecutorData transExecutorData = getData(); if ( super.init( smi, sdi ) ) { // First we need to load the mapping (transformation) try { // Pass the repository down to the metadata object... meta.setRepository( getTransMeta().getRepository() ); transExecutorData.setExecutorTransMeta( loadExecutorTransMeta() ); // Do we have a transformation at all? if ( transExecutorData.getExecutorTransMeta() != null ) { transExecutorData.groupBuffer = new ArrayList<RowMetaAndData>(); // How many rows do we group together for the transformation? if ( !Utils.isEmpty( meta.getGroupSize() ) ) { transExecutorData.groupSize = Const.toInt( environmentSubstitute( meta.getGroupSize() ), -1 ); } else { transExecutorData.groupSize = -1; } // Is there a grouping time set? if ( !Utils.isEmpty( meta.getGroupTime() ) ) { transExecutorData.groupTime = Const.toInt( environmentSubstitute( meta.getGroupTime() ), -1 ); } else { transExecutorData.groupTime = -1; } transExecutorData.groupTimeStart = System.currentTimeMillis(); // Is there a grouping field set? if ( !Utils.isEmpty( meta.getGroupField() ) ) { transExecutorData.groupField = environmentSubstitute( meta.getGroupField() ); } // That's all for now... return true; } else { logError( "No valid transformation was specified nor loaded!" ); return false; } } catch ( Exception e ) { logError( "Unable to load the transformation executor because of an error : ", e ); } } return false; } @VisibleForTesting TransMeta loadExecutorTransMeta() throws KettleException { return TransExecutorMeta.loadMappingMeta( meta, meta.getRepository(), meta.getMetaStore(), this ); } public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { TransExecutorData transExecutorData = getData(); transExecutorData.groupBuffer = null; super.dispose( smi, sdi ); } public void stopRunning( StepMetaInterface stepMetaInterface, StepDataInterface stepDataInterface ) throws KettleException { if ( getData().getExecutorTrans() != null ) { getData().getExecutorTrans().stopAll(); } } public void stopAll() { // Stop the transformation execution. if ( getData().getExecutorTrans() != null ) { getData().getExecutorTrans().stopAll(); } // Also stop this step super.stopAll(); } public Trans getExecutorTrans() { return getData().getExecutorTrans(); } @VisibleForTesting TransExecutorData getData() { return data; } private void setData( TransExecutorData data ) { this.data = data; } }