/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.jobexecutor; import java.util.ArrayList; import com.google.common.annotations.VisibleForTesting; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.Result; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.extension.ExtensionPointHandler; import org.pentaho.di.core.extension.KettleExtensionPoint; import org.pentaho.di.core.logging.KettleLogStore; import org.pentaho.di.core.logging.LoggingObjectInterface; import org.pentaho.di.core.logging.LoggingRegistry; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaFactory; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.DelegationListener; import org.pentaho.di.job.Job; import org.pentaho.di.job.JobExecutionConfiguration; import org.pentaho.di.job.JobMeta; import org.pentaho.di.repository.Repository; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * Execute a job for every input row. * <p> * <b>Note:</b><br/> * Be aware, logic of the classes methods is very similar to corresponding methods of * {@link org.pentaho.di.trans.steps.transexecutor.TransExecutor TransExecutor}. * If you change something in this class, consider copying your changes to TransExecutor as well. * </p> * * @author Matt * @since 22-nov-2005 */ public class JobExecutor extends BaseStep implements StepInterface { private static Class<?> PKG = JobExecutorMeta.class; // for i18n purposes, needed by Translator2!! private JobExecutorMeta meta; private JobExecutorData data; public JobExecutor( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } /** * Process a single row. In our case, we send one row of data to a piece of transformation. In the transformation, we * look up the MappingInput step to send our rows to it. As a consequence, for the time being, there can only be one * MappingInput and one MappingOutput step in the JobExecutor. */ public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { try { meta = (JobExecutorMeta) smi; data = (JobExecutorData) sdi; // Wait for a row... // Object[] row = getRow(); if ( row == null ) { if ( !data.groupBuffer.isEmpty() ) { executeJob(); } setOutputDone(); return false; } if ( first ) { first = false; // calculate the various output row layouts first... // data.inputRowMeta = getInputRowMeta(); data.executionResultsOutputRowMeta = data.inputRowMeta.clone(); data.resultRowsOutputRowMeta = data.inputRowMeta.clone(); data.resultFilesOutputRowMeta = data.inputRowMeta.clone(); if ( meta.getExecutionResultTargetStepMeta() != null ) { meta.getFields( data.executionResultsOutputRowMeta, getStepname(), null, meta .getExecutionResultTargetStepMeta(), this, repository, metaStore ); data.executionResultRowSet = findOutputRowSet( meta.getExecutionResultTargetStepMeta().getName() ); } if ( meta.getResultRowsTargetStepMeta() != null ) { meta.getFields( data.resultRowsOutputRowMeta, getStepname(), null, meta.getResultRowsTargetStepMeta(), this, repository, metaStore ); data.resultRowsRowSet = findOutputRowSet( meta.getResultRowsTargetStepMeta().getName() ); } if ( meta.getResultFilesTargetStepMeta() != null ) { meta.getFields( data.resultFilesOutputRowMeta, getStepname(), null, meta.getResultFilesTargetStepMeta(), this, repository, metaStore ); data.resultFilesRowSet = findOutputRowSet( meta.getResultFilesTargetStepMeta().getName() ); } // Remember which column to group on, if any... // data.groupFieldIndex = -1; if ( !Utils.isEmpty( data.groupField ) ) { data.groupFieldIndex = getInputRowMeta().indexOfValue( data.groupField ); if ( data.groupFieldIndex < 0 ) { throw new KettleException( BaseMessages.getString( PKG, "JobExecutor.Exception.GroupFieldNotFound", data.groupField ) ); } data.groupFieldMeta = getInputRowMeta().getValueMeta( data.groupFieldIndex ); } } // Grouping by field and execution time works ONLY if grouping by size is disabled. if ( data.groupSize < 0 ) { if ( data.groupFieldIndex >= 0 ) { // grouping by field Object groupFieldData = row[data.groupFieldIndex]; if ( data.prevGroupFieldData != null ) { if ( data.groupFieldMeta.compare( data.prevGroupFieldData, groupFieldData ) != 0 ) { executeJob(); } } data.prevGroupFieldData = groupFieldData; } else if ( data.groupTime > 0 ) { // grouping by execution time long now = System.currentTimeMillis(); if ( now - data.groupTimeStart >= data.groupTime ) { executeJob(); } } } // Add next value AFTER job execution, in case we are grouping by field (see PDI-14958), // and BEFORE checking size of a group, in case we are grouping by size (see PDI-14121). data.groupBuffer.add( new RowMetaAndData( getInputRowMeta(), row ) ); // should we clone for safety? // Grouping by size. // If group buffer size exceeds specified limit, then execute job and flush group buffer. if ( data.groupSize > 0 ) { // Pass all input rows... if ( data.groupBuffer.size() >= data.groupSize ) { executeJob(); } } return true; } catch ( Exception e ) { throw new KettleException( BaseMessages.getString( PKG, "JobExecutor.UnexpectedError" ), e ); } } private void executeJob() throws KettleException { // If we got 0 rows on input we don't really want to execute the job // if ( data.groupBuffer.isEmpty() ) { return; } data.groupTimeStart = System.currentTimeMillis(); if ( first ) { discardLogLines( data ); } data.executorJob = createJob( meta.getRepository(), data.executorJobMeta, this ); data.executorJob.setParentTrans( getTrans() ); data.executorJob.setLogLevel( getLogLevel() ); if ( meta.getParameters().isInheritingAllVariables() ) { data.executorJob.shareVariablesWith( this ); } data.executorJob.setInternalKettleVariables( this ); data.executorJob.copyParametersFrom( data.executorJobMeta ); data.executorJob.setArguments( getTrans().getArguments() ); // data.executorJob.setInteractive(); TODO: pass interactivity through the transformation too for drill-down. // TODO /* * if (data.executorJob.isInteractive()) { * data.executorJob.getJobEntryListeners().addAll(parentJob.getJobEntryListeners()); } */ // Pass the accumulated rows // data.executorJob.setSourceRows( data.groupBuffer ); // Pass parameter values // passParametersToJob(); // keep track for drill down in Spoon... // getTrans().getActiveSubjobs().put( getStepname(), data.executorJob ); ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobStart.id, data.executorJob ); data.executorJob.beginProcessing(); Result result = new Result(); // Inform the parent transformation we delegated work here... // for ( DelegationListener delegationListener : getTrans().getDelegationListeners() ) { // TODO: copy some settings in the job execution configuration, not strictly needed // but the execution configuration information is useful in case of a job re-start on Carte // delegationListener.jobDelegationStarted( data.executorJob, new JobExecutionConfiguration() ); } // Now go execute this job // try { result = data.executorJob.execute( 0, result ); } catch ( KettleException e ) { log.logError( "An error occurred executing the job: ", e ); result.setResult( false ); result.setNrErrors( 1 ); } finally { try { ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobFinish.id, data.executorJob ); data.executorJob.fireJobFinishListeners(); } catch ( KettleException e ) { result.setNrErrors( 1 ); result.setResult( false ); log.logError( BaseMessages.getString( PKG, "JobExecutor.Log.ErrorExecJob", e.getMessage() ), e ); } } // First the natural output... // if ( meta.getExecutionResultTargetStepMeta() != null ) { Object[] outputRow = RowDataUtil.allocateRowData( data.executionResultsOutputRowMeta.size() ); int idx = 0; if ( !Utils.isEmpty( meta.getExecutionTimeField() ) ) { outputRow[idx++] = Long.valueOf( System.currentTimeMillis() - data.groupTimeStart ); } if ( !Utils.isEmpty( meta.getExecutionResultField() ) ) { outputRow[idx++] = Boolean.valueOf( result.getResult() ); } if ( !Utils.isEmpty( meta.getExecutionNrErrorsField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrErrors() ); } if ( !Utils.isEmpty( meta.getExecutionLinesReadField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrLinesRead() ); } if ( !Utils.isEmpty( meta.getExecutionLinesWrittenField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrLinesWritten() ); } if ( !Utils.isEmpty( meta.getExecutionLinesInputField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrLinesInput() ); } if ( !Utils.isEmpty( meta.getExecutionLinesOutputField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrLinesOutput() ); } if ( !Utils.isEmpty( meta.getExecutionLinesRejectedField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrLinesRejected() ); } if ( !Utils.isEmpty( meta.getExecutionLinesUpdatedField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrLinesUpdated() ); } if ( !Utils.isEmpty( meta.getExecutionLinesDeletedField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrLinesDeleted() ); } if ( !Utils.isEmpty( meta.getExecutionFilesRetrievedField() ) ) { outputRow[idx++] = Long.valueOf( result.getNrFilesRetrieved() ); } if ( !Utils.isEmpty( meta.getExecutionExitStatusField() ) ) { outputRow[idx++] = Long.valueOf( result.getExitStatus() ); } if ( !Utils.isEmpty( meta.getExecutionLogTextField() ) ) { String channelId = data.executorJob.getLogChannelId(); String logText = KettleLogStore.getAppender().getBuffer( channelId, false ).toString(); outputRow[idx++] = logText; } if ( !Utils.isEmpty( meta.getExecutionLogChannelIdField() ) ) { outputRow[idx++] = data.executorJob.getLogChannelId(); } putRowTo( data.executionResultsOutputRowMeta, outputRow, data.executionResultRowSet ); } // Optionally also send the result rows to a specified target step... // if ( meta.getResultRowsTargetStepMeta() != null && result.getRows() != null ) { for ( RowMetaAndData row : result.getRows() ) { Object[] targetRow = RowDataUtil.allocateRowData( data.resultRowsOutputRowMeta.size() ); for ( int i = 0; i < meta.getResultRowsField().length; i++ ) { ValueMetaInterface valueMeta = row.getRowMeta().getValueMeta( i ); if ( valueMeta.getType() != meta.getResultRowsType()[i] ) { throw new KettleException( BaseMessages.getString( PKG, "JobExecutor.IncorrectDataTypePassed", valueMeta.getTypeDesc(), ValueMetaFactory.getValueMetaName( meta.getResultRowsType()[i] ) ) ); } targetRow[i] = row.getData()[i]; } putRowTo( data.resultRowsOutputRowMeta, targetRow, data.resultRowsRowSet ); } } if ( meta.getResultFilesTargetStepMeta() != null && result.getResultFilesList() != null ) { for ( ResultFile resultFile : result.getResultFilesList() ) { Object[] targetRow = RowDataUtil.allocateRowData( data.resultFilesOutputRowMeta.size() ); int idx = 0; targetRow[idx++] = resultFile.getFile().getName().toString(); // TODO: time, origin, ... putRowTo( data.resultFilesOutputRowMeta, targetRow, data.resultFilesRowSet ); } } data.groupBuffer.clear(); } @VisibleForTesting Job createJob( Repository repository, JobMeta jobMeta, LoggingObjectInterface parentLogging ) { return new Job( repository, jobMeta, parentLogging ); } @VisibleForTesting void discardLogLines( JobExecutorData data ) { // Keep the strain on the logging back-end conservative. // TODO: make this optional/user-defined later if ( data.executorJob != null ) { KettleLogStore.discardLines( data.executorJob.getLogChannelId(), false ); LoggingRegistry.getInstance().removeIncludingChildren( data.executorJob.getLogChannelId() ); } } private void passParametersToJob() throws KettleException { // Set parameters, when fields are used take the first row in the set. // JobExecutorParameters parameters = meta.getParameters(); data.executorJob.clearParameters(); String[] parameterNames = data.executorJob.listParameters(); for ( int i = 0; i < parameters.getVariable().length; i++ ) { String variable = parameters.getVariable()[i]; String fieldName = parameters.getField()[i]; String inputValue = parameters.getInput()[i]; String value; // Take the value from an input row or from a static value? // if ( !Utils.isEmpty( fieldName ) ) { int idx = getInputRowMeta().indexOfValue( fieldName ); if ( idx < 0 ) { throw new KettleException( BaseMessages.getString( PKG, "JobExecutor.Exception.UnableToFindField", fieldName ) ); } value = data.groupBuffer.get( 0 ).getString( idx, "" ); } else { value = environmentSubstitute( inputValue ); } // See if this is a parameter or just a variable... // if ( Const.indexOfString( variable, parameterNames ) < 0 ) { data.executorJob.setVariable( variable, Const.NVL( value, "" ) ); } else { data.executorJob.setParameterValue( variable, Const.NVL( value, "" ) ); } } data.executorJob.activateParameters(); } public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (JobExecutorMeta) smi; data = (JobExecutorData) sdi; if ( super.init( smi, sdi ) ) { // First we need to load the mapping (transformation) try { // Pass the repository down to the metadata object... // meta.setRepository( getTransMeta().getRepository() ); data.executorJobMeta = JobExecutorMeta.loadJobMeta( meta, meta.getRepository(), this ); // Do we have a job at all? // if ( data.executorJobMeta != null ) { data.groupBuffer = new ArrayList<RowMetaAndData>(); // How many rows do we group together for the job? // data.groupSize = -1; if ( !Utils.isEmpty( meta.getGroupSize() ) ) { data.groupSize = Const.toInt( environmentSubstitute( meta.getGroupSize() ), -1 ); } // Is there a grouping time set? // data.groupTime = -1; if ( !Utils.isEmpty( meta.getGroupTime() ) ) { data.groupTime = Const.toInt( environmentSubstitute( meta.getGroupTime() ), -1 ); } data.groupTimeStart = System.currentTimeMillis(); // Is there a grouping field set? // data.groupField = null; if ( !Utils.isEmpty( meta.getGroupField() ) ) { data.groupField = environmentSubstitute( meta.getGroupField() ); } // That's all for now... return true; } else { logError( "No valid job was specified nor loaded!" ); return false; } } catch ( Exception e ) { logError( "Unable to load the executor job because of an error : ", e ); } } return false; } public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { data.groupBuffer = null; super.dispose( smi, sdi ); } public void stopRunning( StepMetaInterface stepMetaInterface, StepDataInterface stepDataInterface ) throws KettleException { if ( data.executorJob != null ) { data.executorJob.stopAll(); } } public void stopAll() { // Stop the job execution. if ( data.executorJob != null ) { data.executorJob.stopAll(); } // Also stop this step super.stopAll(); } /* * * @Override public long getLinesInput() { if (data!=null && data.executorJob != null && * data.executorJob.getResult()!=null) return data.executorJob.getResult().getNrLinesInput(); else return 0; } * * @Override public long getLinesOutput() { if (data!=null && data.executorJob != null && * data.executorJob.getResult()!=null) return data.executorJob.getResult().getNrLinesOutput(); else return 0; } * * @Override public long getLinesRead() { if (data!=null && data.executorJob != null && * data.executorJob.getResult()!=null) return data.executorJob.getResult().getNrLinesRead(); else return 0; } * * @Override public long getLinesRejected() { if (data!=null && data.executorJob != null && * data.executorJob.getResult()!=null) return data.executorJob.getResult().getNrLinesRejected(); else return 0; } * * @Override public long getLinesUpdated() { if (data!=null && data.executorJob != null && * data.executorJob.getResult()!=null) return data.executorJob.getResult().getNrLinesUpdated(); else return 0; } * * @Override public long getLinesWritten() { if (data!=null && data.executorJob != null && * data.executorJob.getResult()!=null) return data.executorJob.getResult().getNrLinesWritten(); else return 0; } */ public Job getExecutorJob() { return data.executorJob; } }