/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.analyticquery; import java.util.concurrent.ConcurrentLinkedQueue; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * Performs analytic queries (LEAD/LAG, etc) based on a group * * @author ngoodman * @since 27-jan-2009 */ public class AnalyticQuery extends BaseStep implements StepInterface { private static Class<?> PKG = AnalyticQuery.class; // for i18n purposes, needed by Translator2!! private AnalyticQueryMeta meta; private AnalyticQueryData data; public AnalyticQuery( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); meta = (AnalyticQueryMeta) getStepMeta().getStepMetaInterface(); data = (AnalyticQueryData) stepDataInterface; } @Override public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (AnalyticQueryMeta) smi; data = (AnalyticQueryData) sdi; Object[] r = getRow(); // get row! if ( first ) { // What is the output looking like? // data.inputRowMeta = getInputRowMeta(); // In case we have 0 input rows, we still want to send out a single row aggregate // However... the problem then is that we don't know the layout from receiving it from the previous step over the // row set. // So we need to calculated based on the metadata... // if ( data.inputRowMeta == null ) { data.inputRowMeta = getTransMeta().getPrevStepFields( getStepMeta() ); } data.outputRowMeta = data.inputRowMeta.clone(); meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); data.groupnrs = new int[meta.getGroupField().length]; for ( int i = 0; i < meta.getGroupField().length; i++ ) { data.groupnrs[i] = data.inputRowMeta.indexOfValue( meta.getGroupField()[i] ); if ( data.groupnrs[i] < 0 ) { logError( BaseMessages.getString( PKG, "AnalyticQuery.Log.GroupFieldCouldNotFound", meta.getGroupField()[i] ) ); setErrors( 1 ); stopAll(); return false; } } // Setup of "window size" and "queue_size" int max_offset = 0; for ( int i = 0; i < meta.getNumberOfFields(); i++ ) { if ( meta.getValueField()[i] > max_offset ) { max_offset = meta.getValueField()[i]; } } data.window_size = max_offset; data.queue_size = ( max_offset * 2 ) + 1; // After we've processed the metadata we're all set first = false; } /* If our row is null we're done, clear the queue and end otherwise process the row */ if ( r == null ) { clearQueue(); setOutputDone(); return false; } else { /* First with every group change AND the first row */ if ( !sameGroup( this.data.previous, r ) ) { clearQueue(); resetGroup(); } /* Add this row to the end of the queue */ data.data.add( r ); /* Push the extra records off the end of the queue */ while ( data.data.size() > data.queue_size ) { data.data.poll(); } data.previous = r.clone(); processQueue(); } if ( log.isBasic() && checkFeedback( getLinesRead() ) ) { logBasic( BaseMessages.getString( PKG, "LineNr", getLinesRead() ) ); } return true; } public void processQueue() throws KettleStepException { // If we've filled up our queue for processing if ( data.data.size() == data.queue_size ) { // Bring current cursor "up to current" if ( data.queue_cursor <= data.window_size ) { while ( data.queue_cursor <= data.window_size ) { processQueueObjectAt( data.queue_cursor + 1 ); data.queue_cursor++; } } else { processQueueObjectAt( data.window_size + 1 ); } } } public void clearQueue() throws KettleStepException { if ( data.data == null ) { return; } int number_of_rows = data.data.size(); for ( int i = data.queue_cursor; i < number_of_rows; i++ ) { processQueueObjectAt( i + 1 ); } } public void processQueueObjectAt( int i ) throws KettleStepException { int index = i - 1; Object[] rows = data.data.toArray(); Object[] fields = new Object[meta.getNumberOfFields()]; for ( int j = 0; j < meta.getNumberOfFields(); j++ ) { // field_index is the location inside a row of the subject of this // ie, ORDERTOTAL might be the subject ofthis field lag or lead // so we determine that ORDERTOTAL's index in the row int field_index = data.inputRowMeta.indexOfValue( meta.getSubjectField()[j] ); int row_index = 0; switch ( meta.getAggregateType()[j] ) { case AnalyticQueryMeta.TYPE_FUNCT_LAG: row_index = index - meta.getValueField()[j]; break; case AnalyticQueryMeta.TYPE_FUNCT_LEAD: row_index = index + meta.getValueField()[j]; break; default: break; } if ( row_index < rows.length && row_index >= 0 ) { Object[] singleRow = (Object[]) rows[row_index]; if ( singleRow != null && singleRow[field_index] != null ) { fields[j] = ( (Object[]) rows[row_index] )[field_index]; } else { // set default fields[j] = null; } } else { // set default fields[j] = null; } } Object[] newRow = RowDataUtil.addRowData( (Object[]) rows[index], data.inputRowMeta.size(), fields ); putRow( data.outputRowMeta, newRow ); } public void resetGroup() { data.data = new ConcurrentLinkedQueue<Object[]>(); data.queue_cursor = 0; } // Is the row r of the same group as previous? private boolean sameGroup( Object[] previous, Object[] r ) throws KettleValueException { if ( ( r == null && previous != null ) || ( previous == null && r != null ) ) { return false; } else { return data.inputRowMeta.compare( previous, r, data.groupnrs ) == 0; } } @Override public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (AnalyticQueryMeta) smi; data = (AnalyticQueryData) sdi; if ( super.init( smi, sdi ) ) { return true; } else { return false; } } }