/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.sortedmerge; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import org.pentaho.di.core.RowSet; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * Do nothing. Pass all input data to the next steps. * * @author Matt * @since 2-jun-2003 */ public class SortedMerge extends BaseStep implements StepInterface { private static Class<?> PKG = SortedMergeMeta.class; // for i18n purposes, needed by Translator2!! private SortedMergeMeta meta; private SortedMergeData data; public SortedMerge( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } /** * We read from all streams in the partition merge mode For that we need at least one row on all input rowsets... If * we don't have a row, we wait for one. * * TODO: keep the inputRowSets() list sorted and go from there. That should dramatically improve speed as you only * need half as many comparisons. * * @return the next row */ private synchronized Object[] getRowSorted() throws KettleException { if ( first ) { first = false; // Verify that socket connections to all the remote input steps are opened // before we start to read/write ... // openRemoteInputStepSocketsOnce(); // Read one row from all rowsets... // data.sortedBuffer = new ArrayList<RowSetRow>(); data.rowMeta = null; // PDI-1212: // If one of the inputRowSets holds a null row (the input yields // 0 rows), then the null rowSet is removed from the InputRowSet buffer.. (BaseStep.getRowFrom()) // which throws this loop off by one (the next set never gets processed). // Instead of modifying BaseStep, I figure reversing the loop here would // effect change in less areas. If the reverse loop causes a problem, please // re-open http://jira.pentaho.com/browse/PDI-1212. for ( int i = getInputRowSets().size() - 1; i >= 0 && !isStopped(); i-- ) { RowSet rowSet = getInputRowSets().get( i ); Object[] row = getRowFrom( rowSet ); if ( row != null ) { // Add this row to the sortedBuffer... // Which is not yet sorted, we'll get to that later. // data.sortedBuffer.add( new RowSetRow( rowSet, rowSet.getRowMeta(), row ) ); if ( data.rowMeta == null ) { data.rowMeta = rowSet.getRowMeta().clone(); } // What fields do we compare on and in what order? // Better cache the location of the partitioning column // First time operation only // if ( data.fieldIndices == null ) { // Get the indexes of the specified sort fields... data.fieldIndices = new int[meta.getFieldName().length]; for ( int f = 0; f < data.fieldIndices.length; f++ ) { data.fieldIndices[f] = data.rowMeta.indexOfValue( meta.getFieldName()[f] ); if ( data.fieldIndices[f] < 0 ) { throw new KettleStepException( "Unable to find fieldname [" + meta.getFieldName()[f] + "] in row : " + data.rowMeta ); } data.rowMeta.getValueMeta( data.fieldIndices[f] ).setSortedDescending( !meta.getAscending()[f] ); } } } data.comparator = new Comparator<RowSetRow>() { public int compare( RowSetRow o1, RowSetRow o2 ) { try { return o1.getRowMeta().compare( o1.getRowData(), o2.getRowData(), data.fieldIndices ); } catch ( KettleValueException e ) { return 0; // TODO see if we should fire off alarms over here... Perhaps throw a RuntimeException. } } }; // Now sort the sortedBuffer for the first time. // Collections.sort( data.sortedBuffer, data.comparator ); } } // If our sorted buffer is empty, it means we're done... // if ( data.sortedBuffer.isEmpty() ) { return null; } // now that we have all rows sorted, all we need to do is find out what the smallest row is. // The smallest row is the first in our case... // RowSetRow smallestRow = data.sortedBuffer.get( 0 ); data.sortedBuffer.remove( 0 ); Object[] outputRowData = smallestRow.getRowData(); // We read another row from the row set where the smallest row came from. // That we we exhaust all row sets. // Object[] extraRow = getRowFrom( smallestRow.getRowSet() ); // Add it to the sorted buffer in the right position... // if ( extraRow != null ) { // Add this one to the sortedBuffer // RowSetRow add = new RowSetRow( smallestRow.getRowSet(), smallestRow.getRowSet().getRowMeta(), extraRow ); int index = Collections.binarySearch( data.sortedBuffer, add, data.comparator ); if ( index < 0 ) { data.sortedBuffer.add( -index - 1, add ); } else { data.sortedBuffer.add( index, add ); } } // This concludes the regular program... // // optionally perform safe mode checking to prevent problems. // if ( getTrans().isSafeModeEnabled() ) { // for checking we need to get data and meta // safeModeChecking( smallestRow.getRowMeta() ); } return outputRowData; } public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (SortedMergeMeta) smi; data = (SortedMergeData) sdi; Object[] row = getRowSorted(); // get row, sorted if ( row == null ) { // no more input to be expected... setOutputDone(); return false; } putRow( data.rowMeta, row ); // copy row to possible alternate rowset(s). if ( checkFeedback( getLinesRead() ) ) { logBasic( BaseMessages.getString( PKG, "SortedMerge.Log.LineNumber" ) + getLinesRead() ); } return true; } public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (SortedMergeMeta) smi; data = (SortedMergeData) sdi; if ( super.init( smi, sdi ) ) { // data.rowComparator = new RowComparator(); // Add init code here. return true; } return false; } }