/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.denormaliser; import java.math.BigDecimal; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.Set; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueDataUtil; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaBase; import org.pentaho.di.core.row.value.ValueMetaDate; import org.pentaho.di.core.row.value.ValueMetaInteger; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * Denormalises data based on key-value pairs * * @author Matt * @since 17-jan-2006 */ public class Denormaliser extends BaseStep implements StepInterface { private static Class<?> PKG = DenormaliserMeta.class; // for i18n purposes, needed by Translator2!! private DenormaliserMeta meta; private DenormaliserData data; private boolean allNullsAreZero = false; private boolean minNullIsValued = false; private Map<String, ValueMetaInterface> conversionMetaCache = new HashMap<String, ValueMetaInterface>(); public Denormaliser( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); meta = (DenormaliserMeta) getStepMeta().getStepMetaInterface(); data = (DenormaliserData) stepDataInterface; } @Override public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { Object[] r = getRow(); // get row! if ( r == null ) { // no more input to be expected... handleLastRow(); setOutputDone(); return false; } if ( first ) { // perform all allocations if ( !processFirstRow() ) { // we failed on first row.... return false; } newGroup(); // Create a new result row (init) deNormalise( data.inputRowMeta, r ); data.previous = r; // copy the row to previous // we don't need feedback here first = false; // ok, we done with first row return true; } if ( !sameGroup( data.inputRowMeta, data.previous, r ) ) { Object[] outputRowData = buildResult( data.inputRowMeta, data.previous ); putRow( data.outputRowMeta, outputRowData ); // copy row to possible alternate rowset(s). newGroup(); // Create a new group aggregate (init) deNormalise( data.inputRowMeta, r ); } else { deNormalise( data.inputRowMeta, r ); } data.previous = r; if ( checkFeedback( getLinesRead() ) ) { if ( log.isBasic() ) { logBasic( BaseMessages.getString( PKG, "Denormaliser.Log.LineNumber" ) + getLinesRead() ); } } return true; } private boolean processFirstRow() throws KettleStepException { String val = getVariable( Const.KETTLE_AGGREGATION_ALL_NULLS_ARE_ZERO, "N" ); this.allNullsAreZero = ValueMetaBase.convertStringToBoolean( val ); val = getVariable( Const.KETTLE_AGGREGATION_MIN_NULL_IS_VALUED, "N" ); this.minNullIsValued = ValueMetaBase.convertStringToBoolean( val ); data.inputRowMeta = getInputRowMeta(); data.outputRowMeta = data.inputRowMeta.clone(); meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); data.keyFieldNr = data.inputRowMeta.indexOfValue( meta.getKeyField() ); if ( data.keyFieldNr < 0 ) { logError( BaseMessages.getString( PKG, "Denormaliser.Log.KeyFieldNotFound", meta.getKeyField() ) ); setErrors( 1 ); stopAll(); return false; } Map<Integer, Integer> subjects = new Hashtable<Integer, Integer>(); data.fieldNameIndex = new int[meta.getDenormaliserTargetField().length]; for ( int i = 0; i < meta.getDenormaliserTargetField().length; i++ ) { DenormaliserTargetField field = meta.getDenormaliserTargetField()[i]; int idx = data.inputRowMeta.indexOfValue( field.getFieldName() ); if ( idx < 0 ) { logError( BaseMessages.getString( PKG, "Denormaliser.Log.UnpivotFieldNotFound", field.getFieldName() ) ); setErrors( 1 ); stopAll(); return false; } data.fieldNameIndex[i] = idx; subjects.put( Integer.valueOf( idx ), Integer.valueOf( idx ) ); // See if by accident, the value fieldname isn't the same as the key fieldname. // This is not supported of-course and given the complexity of the step, you can miss: if ( data.fieldNameIndex[i] == data.keyFieldNr ) { logError( BaseMessages.getString( PKG, "Denormaliser.Log.ValueFieldSameAsKeyField", field.getFieldName() ) ); setErrors( 1 ); stopAll(); return false; } // Fill a hashtable with the key strings and the position(s) of the field(s) in the row to take. // Store the indexes in a List so that we can accommodate multiple key/value pairs... // String keyValue = environmentSubstitute( field.getKeyValue() ); List<Integer> indexes = data.keyValue.get( keyValue ); if ( indexes == null ) { indexes = new ArrayList<Integer>( 2 ); } indexes.add( Integer.valueOf( i ) ); // Add the index to the list... data.keyValue.put( keyValue, indexes ); // store the list } Set<Integer> subjectSet = subjects.keySet(); data.fieldNrs = subjectSet.toArray( new Integer[subjectSet.size()] ); data.groupnrs = new int[meta.getGroupField().length]; for ( int i = 0; i < meta.getGroupField().length; i++ ) { data.groupnrs[i] = data.inputRowMeta.indexOfValue( meta.getGroupField()[i] ); if ( data.groupnrs[i] < 0 ) { logError( BaseMessages.getString( PKG, "Denormaliser.Log.GroupingFieldNotFound", meta.getGroupField()[i] ) ); setErrors( 1 ); stopAll(); return false; } } List<Integer> removeList = new ArrayList<Integer>(); removeList.add( Integer.valueOf( data.keyFieldNr ) ); for ( int i = 0; i < data.fieldNrs.length; i++ ) { removeList.add( data.fieldNrs[i] ); } Collections.sort( removeList ); data.removeNrs = new int[removeList.size()]; for ( int i = 0; i < removeList.size(); i++ ) { data.removeNrs[i] = removeList.get( i ); } return true; } private void handleLastRow() throws KettleException { // Don't forget the last set of rows... if ( data.previous != null ) { // deNormalise(data.previous); --> That would over-do it. // Object[] outputRowData = buildResult( data.inputRowMeta, data.previous ); putRow( data.outputRowMeta, outputRowData ); } } /** * Used for junits in DenormaliserAggregationsTest * * @param rowMeta * @param rowData * @return * @throws KettleValueException */ Object[] buildResult( RowMetaInterface rowMeta, Object[] rowData ) throws KettleValueException { // Deleting objects: we need to create a new object array // It's useless to call RowDataUtil.resizeArray // Object[] outputRowData = RowDataUtil.allocateRowData( data.outputRowMeta.size() ); int outputIndex = 0; // Copy the data from the incoming row, but remove the unwanted fields in the same loop... // int removeIndex = 0; for ( int i = 0; i < rowMeta.size(); i++ ) { if ( removeIndex < data.removeNrs.length && i == data.removeNrs[removeIndex] ) { removeIndex++; } else { outputRowData[outputIndex++] = rowData[i]; } } // Add the unpivoted fields... // for ( int i = 0; i < data.targetResult.length; i++ ) { Object resultValue = data.targetResult[i]; DenormaliserTargetField field = meta.getDenormaliserTargetField()[i]; switch ( field.getTargetAggregationType() ) { case DenormaliserTargetField.TYPE_AGGR_AVERAGE: long count = data.counters[i]; Object sum = data.sum[i]; if ( count > 0 ) { if ( sum instanceof Long ) { resultValue = (Long) sum / count; } else if ( sum instanceof Double ) { resultValue = (Double) sum / count; } else if ( sum instanceof BigDecimal ) { resultValue = ( (BigDecimal) sum ).divide( new BigDecimal( count ) ); } else { resultValue = null; // TODO: perhaps throw an exception here?< } } break; case DenormaliserTargetField.TYPE_AGGR_COUNT_ALL: if ( resultValue == null ) { resultValue = Long.valueOf( 0 ); } if ( field.getTargetType() != ValueMetaInterface.TYPE_INTEGER ) { resultValue = data.outputRowMeta.getValueMeta( outputIndex ).convertData( new ValueMetaInteger( "num_values_aggregation" ), resultValue ); } break; default: break; } if ( resultValue == null && allNullsAreZero ) { // PDI-9662 seems all rows for min function was nulls... resultValue = getZero( outputIndex ); } outputRowData[outputIndex++] = resultValue; } return outputRowData; } private Object getZero( int field ) throws KettleValueException { ValueMetaInterface vm = data.outputRowMeta.getValueMeta( field ); return ValueDataUtil.getZeroForValueMetaType( vm ); } // Is the row r of the same group as previous? private boolean sameGroup( RowMetaInterface rowMeta, Object[] previous, Object[] rowData ) throws KettleValueException { return rowMeta.compare( previous, rowData, data.groupnrs ) == 0; } /** * Initialize a new group... * * @throws KettleException */ private void newGroup( ) throws KettleException { // There is no need anymore to take care of the meta-data. // That is done once in DenormaliserMeta.getFields() // data.targetResult = new Object[meta.getDenormaliserTargetFields().length]; DenormaliserTargetField[] fields = meta.getDenormaliserTargetField(); for ( int i = 0; i < fields.length; i++ ) { data.counters[i] = 0L; // set to 0 data.sum[i] = null; } } /** * This method de-normalizes a single key-value pair. It looks up the key and determines the value name to store it * in. It converts it to the right type and stores it in the result row. * * Used for junits in DenormaliserAggregationsTest * * @param r * @throws KettleValueException */ void deNormalise( RowMetaInterface rowMeta, Object[] rowData ) throws KettleValueException { ValueMetaInterface valueMeta = rowMeta.getValueMeta( data.keyFieldNr ); Object valueData = rowData[data.keyFieldNr]; String key = valueMeta.getCompatibleString( valueData ); if ( Utils.isEmpty( key ) ) { return; } // Get all the indexes for the given key value... // List<Integer> indexes = data.keyValue.get( key ); if ( indexes == null ) { // otherwise we're not interested. return; } for ( Integer keyNr : indexes ) { if ( keyNr == null ) { continue; } // keyNr is the field in DenormaliserTargetField[] // int idx = keyNr.intValue(); DenormaliserTargetField field = meta.getDenormaliserTargetField()[idx]; // This is the value we need to de-normalise, convert, aggregate. // ValueMetaInterface sourceMeta = rowMeta.getValueMeta( data.fieldNameIndex[idx] ); Object sourceData = rowData[data.fieldNameIndex[idx]]; Object targetData; // What is the target value metadata?? // ValueMetaInterface targetMeta = data.outputRowMeta.getValueMeta( data.inputRowMeta.size() - data.removeNrs.length + idx ); // What was the previous target in the result row? // Object prevTargetData = data.targetResult[idx]; // clone source meta as it can be used by other steps ans set conversion meta // to convert date to target format // See PDI-4910 for details ValueMetaInterface origSourceMeta = sourceMeta; if ( targetMeta.isDate() ) { sourceMeta = origSourceMeta.clone(); sourceMeta.setConversionMetadata( getConversionMeta( field.getTargetFormat() ) ); } switch ( field.getTargetAggregationType() ) { case DenormaliserTargetField.TYPE_AGGR_SUM: targetData = targetMeta.convertData( sourceMeta, sourceData ); if ( prevTargetData != null ) { prevTargetData = ValueDataUtil.sum( targetMeta, prevTargetData, targetMeta, targetData ); } else { prevTargetData = targetData; } break; case DenormaliserTargetField.TYPE_AGGR_MIN: if ( sourceData == null && !minNullIsValued ) { // PDI-9662 do not compare null break; } if ( ( prevTargetData == null && !minNullIsValued ) || sourceMeta.compare( sourceData, targetMeta, prevTargetData ) < 0 ) { prevTargetData = targetMeta.convertData( sourceMeta, sourceData ); } break; case DenormaliserTargetField.TYPE_AGGR_MAX: if ( sourceMeta.compare( sourceData, targetMeta, prevTargetData ) > 0 ) { prevTargetData = targetMeta.convertData( sourceMeta, sourceData ); } break; case DenormaliserTargetField.TYPE_AGGR_COUNT_ALL: prevTargetData = ++data.counters[idx]; break; case DenormaliserTargetField.TYPE_AGGR_AVERAGE: targetData = targetMeta.convertData( sourceMeta, sourceData ); if ( !sourceMeta.isNull( sourceData ) ) { prevTargetData = data.counters[idx]++; if ( data.sum[idx] == null ) { data.sum[idx] = targetData; } else { data.sum[idx] = ValueDataUtil.plus( targetMeta, data.sum[idx], targetMeta, targetData ); } // data.sum[idx] = (Integer)data.sum[idx] + (Integer)sourceData; } break; case DenormaliserTargetField.TYPE_AGGR_CONCAT_COMMA: String separator = ","; targetData = targetMeta.convertData( sourceMeta, sourceData ); if ( prevTargetData != null ) { prevTargetData = prevTargetData + separator + targetData; } else { prevTargetData = targetData; } break; case DenormaliserTargetField.TYPE_AGGR_NONE: default: prevTargetData = targetMeta.convertData( sourceMeta, sourceData ); // Overwrite the previous break; } // Update the result row too // data.targetResult[idx] = prevTargetData; } } @Override public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (DenormaliserMeta) smi; data = (DenormaliserData) sdi; if ( super.init( smi, sdi ) ) { data.counters = new long[meta.getDenormaliserTargetField().length]; data.sum = new Object[meta.getDenormaliserTargetField().length]; return true; } return false; } @Override public void batchComplete() throws KettleException { handleLastRow(); data.previous = null; } /** * Get the metadata used for conversion to date format See related PDI-4019 * * @param mask * @return */ private ValueMetaInterface getConversionMeta( String mask ) { ValueMetaInterface meta = null; if ( !Utils.isEmpty( mask ) ) { meta = conversionMetaCache.get( mask ); if ( meta == null ) { meta = new ValueMetaDate(); meta.setConversionMask( mask ); conversionMetaCache.put( mask, meta ); } } return meta; } /** * Used for junits in DenormaliserAggregationsTest * * @param allNullsAreZero * the allNullsAreZero to set */ void setAllNullsAreZero( boolean allNullsAreZero ) { this.allNullsAreZero = allNullsAreZero; } /** * Used for junits in DenormaliserAggregationsTest * * @param minNullIsValued * the minNullIsValued to set */ void setMinNullIsValued( boolean minNullIsValued ) { this.minNullIsValued = minNullIsValued; } }