/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.streamlookup; import java.math.BigDecimal; import java.text.DateFormat; import java.util.Collections; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.RowSet; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.hash.ByteArrayHashIndex; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaFactory; import org.pentaho.di.core.util.Utils; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * Looks up information by first reading data into a hash table (in memory) * * TODO: add warning with conflicting types OR modify the lookup values to the input row type. (this is harder to do as * currently we don't know the types) * * @author Matt * @since 26-apr-2003 */ public class StreamLookup extends BaseStep implements StepInterface { private static Class<?> PKG = StreamLookupMeta.class; // for i18n purposes, needed by Translator2!! private StreamLookupMeta meta; private StreamLookupData data; public StreamLookup( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } private void handleNullIf() { data.nullIf = new Object[meta.getValue().length]; for ( int i = 0; i < meta.getValue().length; i++ ) { if ( meta.getValueDefaultType()[i] < 0 ) { //CHECKSTYLE:Indentation:OFF meta.getValueDefaultType()[i] = ValueMetaInterface.TYPE_STRING; } data.nullIf[i] = null; switch ( meta.getValueDefaultType()[i] ) { case ValueMetaInterface.TYPE_STRING: if ( Utils.isEmpty( meta.getValueDefault()[i] ) ) { data.nullIf[i] = null; } else { data.nullIf[i] = meta.getValueDefault()[i]; } break; case ValueMetaInterface.TYPE_DATE: try { data.nullIf[i] = DateFormat.getInstance().parse( meta.getValueDefault()[i] ); } catch ( Exception e ) { // Ignore errors } break; case ValueMetaInterface.TYPE_NUMBER: try { data.nullIf[i] = Double.parseDouble( meta.getValueDefault()[i] ); } catch ( Exception e ) { // Ignore errors } break; case ValueMetaInterface.TYPE_INTEGER: try { data.nullIf[i] = Long.parseLong( meta.getValueDefault()[i] ); } catch ( Exception e ) { // Ignore errors } break; case ValueMetaInterface.TYPE_BOOLEAN: if ( "TRUE".equalsIgnoreCase( meta.getValueDefault()[i] ) || "Y".equalsIgnoreCase( meta.getValueDefault()[i] ) ) { data.nullIf[i] = Boolean.TRUE; } else { data.nullIf[i] = Boolean.FALSE; } break; case ValueMetaInterface.TYPE_BIGNUMBER: try { data.nullIf[i] = new BigDecimal( meta.getValueDefault()[i] ); } catch ( Exception e ) { // Ignore errors } break; default: // if a default value is given and no conversion is implemented throw an error if ( meta.getValueDefault()[i] != null && meta.getValueDefault()[i].trim().length() > 0 ) { throw new RuntimeException( BaseMessages.getString( PKG, "StreamLookup.Exception.ConversionNotImplemented" ) + " " + ValueMetaFactory.getValueMetaName( meta.getValueDefaultType()[i] ) ); } else { // no default value given: just set it to null data.nullIf[i] = null; break; } } } } private boolean readLookupValues() throws KettleException { data.infoStream = meta.getStepIOMeta().getInfoStreams().get( 0 ); if ( data.infoStream.getStepMeta() == null ) { logError( BaseMessages.getString( PKG, "StreamLookup.Log.NoLookupStepSpecified" ) ); return false; } if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "StreamLookup.Log.ReadingFromStream" ) + data.infoStream.getStepname() + "]" ); } int[] keyNrs = new int[meta.getKeylookup().length]; int[] valueNrs = new int[meta.getValue().length]; boolean firstRun = true; // Which row set do we read from? // RowSet rowSet = findInputRowSet( data.infoStream.getStepname() ); Object[] rowData = getRowFrom( rowSet ); // rows are originating from "lookup_from" while ( rowData != null ) { if ( log.isRowLevel() ) { logRowlevel( BaseMessages.getString( PKG, "StreamLookup.Log.ReadLookupRow" ) + rowSet.getRowMeta().getString( rowData ) ); } if ( firstRun ) { firstRun = false; data.hasLookupRows = true; data.infoMeta = rowSet.getRowMeta().clone(); RowMetaInterface cacheKeyMeta = new RowMeta(); RowMetaInterface cacheValueMeta = new RowMeta(); // Look up the keys in the source rows for ( int i = 0; i < meta.getKeylookup().length; i++ ) { keyNrs[i] = rowSet.getRowMeta().indexOfValue( meta.getKeylookup()[i] ); if ( keyNrs[i] < 0 ) { throw new KettleStepException( BaseMessages.getString( PKG, "StreamLookup.Exception.UnableToFindField", meta.getKeylookup()[i] ) ); } cacheKeyMeta.addValueMeta( rowSet.getRowMeta().getValueMeta( keyNrs[i] ) ); } // Save the data types of the keys to optionally convert input rows later on... if ( data.keyTypes == null ) { data.keyTypes = cacheKeyMeta.clone(); } // Cache keys are stored as normal types, not binary for ( int i = 0; i < keyNrs.length; i++ ) { cacheKeyMeta.getValueMeta( i ).setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL ); } for ( int v = 0; v < meta.getValue().length; v++ ) { valueNrs[v] = rowSet.getRowMeta().indexOfValue( meta.getValue()[v] ); if ( valueNrs[v] < 0 ) { throw new KettleStepException( BaseMessages.getString( PKG, "StreamLookup.Exception.UnableToFindField", meta.getValue()[v] ) ); } cacheValueMeta.addValueMeta( rowSet.getRowMeta().getValueMeta( valueNrs[v] ) ); } data.cacheKeyMeta = cacheKeyMeta; data.cacheValueMeta = cacheValueMeta; } Object[] keyData = new Object[keyNrs.length]; for ( int i = 0; i < keyNrs.length; i++ ) { ValueMetaInterface keyMeta = data.keyTypes.getValueMeta( i ); // Convert keys to normal storage type keyData[i] = keyMeta.convertToNormalStorageType( rowData[keyNrs[i]] ); } Object[] valueData = new Object[valueNrs.length]; for ( int i = 0; i < valueNrs.length; i++ ) { // Store value as is, avoid preliminary binary->normal storage type conversion valueData[i] = rowData[valueNrs[i]]; } addToCache( data.cacheKeyMeta, keyData, data.cacheValueMeta, valueData ); rowData = getRowFrom( rowSet ); } return true; } private Object[] lookupValues( RowMetaInterface rowMeta, Object[] row ) throws KettleException { // See if we need to stop. if ( isStopped() ) { return null; } if ( data.lookupColumnIndex == null ) { String[] names = data.lookupMeta.getFieldNames(); data.lookupColumnIndex = new int[names.length]; for ( int i = 0; i < names.length; i++ ) { data.lookupColumnIndex[i] = rowMeta.indexOfValue( names[i] ); if ( data.lookupColumnIndex[i] < 0 ) { // we should not get here throw new KettleStepException( "The lookup column '" + names[i] + "' could not be found" ); } } } // Copy value references to lookup table. // Object[] lu = new Object[data.keynrs.length]; for ( int i = 0; i < data.keynrs.length; i++ ) { // If the input is binary storage data, we convert it to normal storage. // if ( data.convertKeysToNative[i] ) { lu[i] = data.lookupMeta.getValueMeta( i ).convertBinaryStringToNativeType( (byte[]) row[data.keynrs[i]] ); } else { lu[i] = row[data.keynrs[i]]; } } // Handle conflicting types (Number-Integer-String conversion to lookup type in hashtable) if ( data.keyTypes != null ) { for ( int i = 0; i < data.lookupMeta.size(); i++ ) { ValueMetaInterface inputValue = data.lookupMeta.getValueMeta( i ); ValueMetaInterface lookupValue = data.keyTypes.getValueMeta( i ); if ( inputValue.getType() != lookupValue.getType() ) { try { // Change the input value to match the lookup value // lu[i] = lookupValue.convertDataCompatible( inputValue, lu[i] ); } catch ( KettleValueException e ) { throw new KettleStepException( "Error converting data while looking up value", e ); } } } } Object[] add = null; if ( data.hasLookupRows ) { try { if ( meta.getKeystream().length > 0 ) { add = getFromCache( data.cacheKeyMeta, lu ); } else { // Just take the first element in the hashtable... throw new KettleStepException( BaseMessages.getString( PKG, "StreamLookup.Log.GotRowWithoutKeys" ) ); } } catch ( Exception e ) { throw new KettleStepException( e ); } } if ( add == null ) { // nothing was found, unknown code: add the specified default value... add = data.nullIf; } return RowDataUtil.addRowData( row, rowMeta.size(), add ); } private void addToCache( RowMetaInterface keyMeta, Object[] keyData, RowMetaInterface valueMeta, Object[] valueData ) throws KettleValueException { if ( meta.isMemoryPreservationActive() ) { if ( meta.isUsingSortedList() ) { KeyValue keyValue = new KeyValue( keyData, valueData ); int idx = Collections.binarySearch( data.list, keyValue, data.comparator ); if ( idx < 0 ) { int index = -idx - 1; // this is the insertion point data.list.add( index, keyValue ); // insert to keep sorted. } else { data.list.set( idx, keyValue ); // Overwrite to simulate Hashtable behaviour } } else { if ( meta.isUsingIntegerPair() ) { if ( !data.metadataVerifiedIntegerPair ) { data.metadataVerifiedIntegerPair = true; if ( keyMeta.size() != 1 || valueMeta.size() != 1 || !keyMeta.getValueMeta( 0 ).isInteger() || !valueMeta.getValueMeta( 0 ).isInteger() ) { throw new KettleValueException( BaseMessages.getString( PKG, "StreamLookup.Exception.CanNotUseIntegerPairAlgorithm" ) ); } } Long key = keyMeta.getInteger( keyData, 0 ); Long value = valueMeta.getInteger( valueData, 0 ); data.longIndex.put( key, value ); } else { if ( data.hashIndex == null ) { data.hashIndex = new ByteArrayHashIndex( keyMeta ); } data.hashIndex .put( RowMeta.extractData( keyMeta, keyData ), RowMeta.extractData( valueMeta, valueData ) ); } } } else { // We can't just put Object[] in the map The compare function is not in it. // We need to wrap in and use that. Let's use RowMetaAndData for this one. data.look.put( new RowMetaAndData( keyMeta, keyData ), valueData ); } } private Object[] getFromCache( RowMetaInterface keyMeta, Object[] keyData ) throws KettleValueException { if ( meta.isMemoryPreservationActive() ) { if ( meta.isUsingSortedList() ) { KeyValue keyValue = new KeyValue( keyData, null ); int idx = Collections.binarySearch( data.list, keyValue, data.comparator ); if ( idx < 0 ) { return null; // nothing found } keyValue = data.list.get( idx ); return keyValue.getValue(); } else { if ( meta.isUsingIntegerPair() ) { Long value = data.longIndex.get( keyMeta.getInteger( keyData, 0 ) ); if ( value == null ) { return null; } return new Object[] { value, }; } else { try { byte[] value = data.hashIndex.get( RowMeta.extractData( keyMeta, keyData ) ); if ( value == null ) { return null; } return RowMeta.getRow( data.cacheValueMeta, value ); } catch ( Exception e ) { logError( "Oops", e ); throw new RuntimeException( e ); } } } } else { return data.look.get( new RowMetaAndData( keyMeta, keyData ) ); } } @Override public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (StreamLookupMeta) smi; data = (StreamLookupData) sdi; if ( data.readLookupValues ) { data.readLookupValues = false; if ( !readLookupValues() ) { // Read values in lookup table (look) logError( BaseMessages.getString( PKG, "StreamLookup.Log.UnableToReadDataFromLookupStream" ) ); setErrors( 1 ); stopAll(); return false; } return true; } Object[] r = getRow(); // Get row from input rowset & set row busy! if ( r == null ) { // no more input to be expected... if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "StreamLookup.Log.StoppedProcessingWithEmpty", getLinesRead() + "" ) ); } setOutputDone(); return false; } if ( first ) { first = false; // read the lookup values! data.keynrs = new int[meta.getKeystream().length]; data.lookupMeta = new RowMeta(); data.convertKeysToNative = new boolean[meta.getKeystream().length]; for ( int i = 0; i < meta.getKeystream().length; i++ ) { // Find the keynr in the row (only once) data.keynrs[i] = getInputRowMeta().indexOfValue( meta.getKeystream()[i] ); if ( data.keynrs[i] < 0 ) { throw new KettleStepException( BaseMessages .getString( PKG, "StreamLookup.Log.FieldNotFound", meta.getKeystream()[i], "" + getInputRowMeta().getString( r ) ) ); } else { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "StreamLookup.Log.FieldInfo", meta.getKeystream()[i], "" + data.keynrs[i] ) ); } } data.lookupMeta.addValueMeta( getInputRowMeta().getValueMeta( data.keynrs[i] ).clone() ); // If we have binary storage data coming in, we convert it to normal data storage. // The storage in the lookup data store is also normal data storage. TODO: enforce normal data storage?? // data.convertKeysToNative[i] = getInputRowMeta().getValueMeta( data.keynrs[i] ).isStorageBinaryString(); } data.outputRowMeta = getInputRowMeta().clone(); meta.getFields( data.outputRowMeta, getStepname(), new RowMetaInterface[] { data.infoMeta }, null, this, repository, metaStore ); // Handle the NULL values (not found...) handleNullIf(); } Object[] outputRow = lookupValues( getInputRowMeta(), r ); // Do the actual lookup in the hastable. if ( outputRow == null ) { setOutputDone(); // signal end to receiver(s) return false; } putRow( data.outputRowMeta, outputRow ); // copy row to output rowset(s); if ( checkFeedback( getLinesRead() ) ) { if ( log.isBasic() ) { logBasic( BaseMessages.getString( PKG, "StreamLookup.Log.LineNumber" ) + getLinesRead() ); } } return true; } @Override public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (StreamLookupMeta) smi; data = (StreamLookupData) sdi; if ( super.init( smi, sdi ) ) { data.readLookupValues = true; return true; } return false; } @Override public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { // Recover memory immediately, allow in-memory data to be garbage collected // data.look = null; data.list = null; data.hashIndex = null; data.longIndex = null; super.dispose( smi, sdi ); } }