/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.concatfields; import java.io.UnsupportedEncodingException; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.util.Utils; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.pentaho.di.trans.steps.textfileoutput.TextFileOutput; /* * ConcatFields step - derived from the TextFileOutput step * @author jb * @since 2012-08-31 * */ public class ConcatFields extends TextFileOutput implements StepInterface { private static Class<?> PKG = ConcatFields.class; // for i18n purposes, needed by Translator2!! public ConcatFieldsMeta meta; public ConcatFieldsData data; public ConcatFields( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); // allocate TextFileOutput } @Override public synchronized boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (ConcatFieldsMeta) smi; data = (ConcatFieldsData) sdi; boolean result = true; boolean bEndedLineWrote = false; Object[] r = getRow(); // This also waits for a row to be finished. if ( r != null && first ) { first = false; data.outputRowMeta = getInputRowMeta().clone(); meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); // the field precisions and lengths are altered! see TextFileOutputMeta.getFields(). // otherwise trim(), padding etc. will not work data.inputRowMetaModified = getInputRowMeta().clone(); meta .getFieldsModifyInput( data.inputRowMetaModified, getStepname(), null, null, this, repository, metaStore ); data.posTargetField = data.outputRowMeta.indexOfValue( meta.getTargetFieldName() ); if ( data.posTargetField < 0 ) { throw new KettleStepException( BaseMessages.getString( PKG, "ConcatFields.Error.TargetFieldNotFoundOutputStream", "" + meta.getTargetFieldName() ) ); } data.fieldnrs = new int[meta.getOutputFields().length]; for ( int i = 0; i < meta.getOutputFields().length; i++ ) { data.fieldnrs[i] = data.inputRowMetaModified.indexOfValue( meta.getOutputFields()[i].getName() ); if ( data.fieldnrs[i] < 0 ) { throw new KettleStepException( BaseMessages.getString( PKG, "ConcatFields.Error.FieldNotFoundInputStream", "" + meta.getOutputFields()[i].getName() ) ); } } // prepare for fast data dump (StringBuilder size) data.targetFieldLengthFastDataDump = meta.getTargetFieldLength(); if ( data.targetFieldLengthFastDataDump <= 0 ) { // try it as a guess: 50 * size if ( meta.getOutputFields().length == 0 ) { data.targetFieldLengthFastDataDump = 50 * getInputRowMeta().size(); } else { data.targetFieldLengthFastDataDump = 50 * meta.getOutputFields().length; } } prepareForReMap(); checkAndWriteHeader(); } if ( ( r == null && data.outputRowMeta != null && meta.isFooterEnabled() ) || ( r != null && getLinesWritten() > 0 && meta.getSplitEvery() > 0 && ( ( getLinesWritten() + 1 ) % meta.getSplitEvery() ) == 0 ) ) { if ( data.outputRowMeta != null ) { if ( meta.isFooterEnabled() ) { writeHeader(); // add an empty line for the header Object[] row = new Object[data.outputRowMeta.size()]; putRowFromStream( row ); } } if ( r == null ) { // add tag to last line if needed writeEndedLine(); bEndedLineWrote = true; putRowFromStream( r ); } } if ( r == null ) { // no more input to be expected... if ( false == bEndedLineWrote ) { // add tag to last line if needed writeEndedLine(); bEndedLineWrote = true; putRowFromStream( r ); } setOutputDone(); setLinesOutput( 0 ); // we have to tweak it, no output here return false; } if ( !meta.isFastDump() ) { // instead of writing to file, writes it to a stream writeRowToFile( data.inputRowMetaModified, r ); setLinesOutput( 0 ); // we have to tweak it, no output here r = putRowFromStream( r ); } else { // fast data dump r = putRowFastDataDump( r ); } if ( log.isRowLevel() ) { logRowlevel( BaseMessages.getString( PKG, "ConcatFields.Log.WriteRow" ) + getLinesWritten() + " : " + data.outputRowMeta.getString( r ) ); } if ( checkFeedback( getLinesRead() ) ) { if ( log.isBasic() ) { logBasic( BaseMessages.getString( PKG, "ConcatFields.Log.LineNumber" ) + getLinesRead() ); } } return result; } void checkAndWriteHeader() throws KettleStepException { // See if we have to write a header-line) if ( !meta.isFileAppended() && ( meta.isHeaderEnabled() || meta.isFooterEnabled() ) ) { if ( !meta.isFileNameInField() && meta.isHeaderEnabled() && data.outputRowMeta != null ) { writeHeader(); // add an empty line for the header Object[] row = new Object[data.outputRowMeta.size()]; putRowFromStream( row ); } } } void prepareForReMap() throws KettleStepException { // prepare for re-map when removeSelectedFields if ( meta.isRemoveSelectedFields() ) { data.remainingFieldsInputOutputMapping = new int[data.outputRowMeta.size() - 1]; // -1: don't need the new // target field String[] fieldNames = data.outputRowMeta.getFieldNames(); for ( int i = 0; i < fieldNames.length - 1; i++ ) { // -1: don't search the new target field data.remainingFieldsInputOutputMapping[i] = data.inputRowMetaModified.indexOfValue( fieldNames[i] ); if ( data.remainingFieldsInputOutputMapping[i] < 0 ) { throw new KettleStepException( BaseMessages.getString( PKG, "ConcatFields.Error.RemainingFieldNotFoundInputStream", "" + fieldNames[i] ) ); } } } } // reads the row from the stream, flushs, add target field and call putRow() Object[] putRowFromStream( Object[] r ) throws KettleStepException { byte[] targetBinary = ( (ConcatFieldsOutputStream) data.writer ).read(); if ( r == null && targetBinary == null ) { return null; // special condition of header/footer/split } Object[] outputRowData = prepareOutputRow( r ); // add target field if ( outputRowData == null ) { // special condition of header/footer/split outputRowData = new Object[data.outputRowMeta.size()]; } if ( targetBinary != null ) { if ( !data.hasEncoding ) { outputRowData[data.posTargetField] = new String( targetBinary ); } else { // handle encoding try { outputRowData[data.posTargetField] = new String( targetBinary, meta.getEncoding() ); } catch ( UnsupportedEncodingException e ) { throw new KettleStepException( BaseMessages.getString( PKG, "ConcatFields.Error.UnsupportedEncoding", "" + meta.getEncoding() ) ); } } } else { outputRowData[data.posTargetField] = null; } putRow( data.outputRowMeta, outputRowData ); return outputRowData; } // concat as a fast data dump (no formatting) and call putRow() // this method is only called from a normal line, never from header/footer/split stuff Object[] putRowFastDataDump( Object[] r ) throws KettleStepException { Object[] outputRowData = prepareOutputRow( r ); StringBuilder targetString = new StringBuilder( data.targetFieldLengthFastDataDump ); // use a good capacity if ( meta.getOutputFields() == null || meta.getOutputFields().length == 0 ) { // all values in stream for ( int i = 0; i < getInputRowMeta().size(); i++ ) { if ( i > 0 ) { targetString.append( data.stringSeparator ); } concatFieldFastDataDump( targetString, r[i], "" ); // "": no specific null value defined } } else { for ( int i = 0; i < data.fieldnrs.length; i++ ) { if ( i > 0 ) { targetString.append( data.stringSeparator ); } concatFieldFastDataDump( targetString, r[data.fieldnrs[i]], data.stringNullValue[i] ); } } outputRowData[data.posTargetField] = new String( targetString ); putRow( data.outputRowMeta, outputRowData ); return outputRowData; } private void concatFieldFastDataDump( StringBuilder targetField, Object valueData, String nullString ) { if ( meta.isEnclosureForced() ) { targetField.append( data.stringEnclosure ); } if ( valueData != null ) { targetField.append( valueData ); } else { targetField.append( nullString ); } if ( meta.isEnclosureForced() ) { targetField.append( data.stringEnclosure ); } } // reserve room for the target field and eventually re-map the fields Object[] prepareOutputRow( Object[] r ) { Object[] outputRowData = null; if ( !meta.isRemoveSelectedFields() ) { // reserve room for the target field outputRowData = RowDataUtil.resizeArray( r, data.outputRowMeta.size() ); } else { // reserve room for the target field and re-map the fields outputRowData = new Object[data.outputRowMeta.size() + RowDataUtil.OVER_ALLOCATE_SIZE]; if ( r != null ) { // re-map the fields for ( int i = 0; i < data.remainingFieldsInputOutputMapping.length; i++ ) { // BTW: the new target field is not // here outputRowData[i] = r[data.remainingFieldsInputOutputMapping[i]]; } } } return outputRowData; } @Override public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (ConcatFieldsMeta) smi; data = (ConcatFieldsData) sdi; // since we can no call the initial init() from BaseStep we have to tweak here meta.setDoNotOpenNewFileInit( true ); // do not open a file in init data.writer = new ConcatFieldsOutputStream(); initStringDataFields(); boolean rv = super.init( smi, sdi ); // calls also initBinaryDataFields(); data.binaryNewline = new byte[] {}; // tweak the CR/LF handling return rv; } // init separator,enclosure, null values for fast data dump private void initStringDataFields() { data.stringSeparator = ""; data.stringEnclosure = ""; if ( !Utils.isEmpty( meta.getSeparator() ) ) { data.stringSeparator = environmentSubstitute( meta.getSeparator() ); } if ( !Utils.isEmpty( meta.getEnclosure() ) ) { data.stringEnclosure = environmentSubstitute( meta.getEnclosure() ); } data.stringNullValue = new String[meta.getOutputFields().length]; for ( int i = 0; i < meta.getOutputFields().length; i++ ) { data.stringNullValue[i] = ""; String nullString = meta.getOutputFields()[i].getNullString(); if ( !Utils.isEmpty( nullString ) ) { data.stringNullValue[i] = nullString; } } } @Override public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { super.dispose( smi, sdi ); // since we can no call the initial dispose() from BaseStep we may need to tweak // when the dispose() from TextFileOutput will have bad effects in the future due to changes and call this manually // sdi.setStatus(StepExecutionStatus.STATUS_DISPOSED); // but we try to avoid } }