/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.jsoninput; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.BitSet; import org.apache.commons.lang.NotImplementedException; import org.apache.commons.vfs2.FileObject; import org.apache.commons.vfs2.FileSystemException; import org.apache.poi.util.IOUtils; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.QueueRowSet; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.RowSet; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.pentaho.di.trans.steps.fileinput.BaseFileInputStep; import org.pentaho.di.trans.steps.fileinput.IBaseFileInputReader; import org.pentaho.di.trans.steps.jsoninput.exception.JsonInputException; import org.pentaho.di.trans.steps.jsoninput.reader.FastJsonReader; import org.pentaho.di.trans.steps.jsoninput.reader.InputsReader; import org.pentaho.di.trans.steps.jsoninput.reader.RowOutputConverter; /** * Read Json files, parse them and convert them to rows and writes these to one or more output streams. * * @author Samatar * @author edube * @author jadametz * @since 20-06-2010 */ public class JsonInput extends BaseFileInputStep<JsonInputMeta, JsonInputData> implements StepInterface { private static Class<?> PKG = JsonInputMeta.class; // for i18n purposes, needed by Translator2!! private RowOutputConverter rowOutputConverter; private static final byte[] EMPTY_JSON = "{}".getBytes(); // for replacing null inputs public JsonInput( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } @Override protected boolean init() { data.rownr = 1L; data.nrInputFields = meta.getInputFields().length; data.repeatedFields = new BitSet( data.nrInputFields ); // Take care of variable substitution for ( int i = 0; i < data.nrInputFields; i++ ) { JsonInputField field = meta.getInputFields()[ i ]; if ( field.isRepeated() ) { data.repeatedFields.set( i ); } } try { // Init a new JSON reader createReader(); } catch ( KettleException e ) { logError( e.getMessage() ); return false; } return true; } @Override public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { if ( first ) { first = false; prepareToRowProcessing(); } Object[] outRow = null; try { // Grab a row outRow = getOneOutputRow(); if ( outRow == null ) { setOutputDone(); // signal end to receiver(s) return false; // end of data or error. } if ( log.isRowLevel() ) { logRowlevel( BaseMessages.getString( PKG, "JsonInput.Log.ReadRow", data.outputRowMeta.getString( outRow ) ) ); } incrementLinesInput(); data.rownr++; putRow( data.outputRowMeta, outRow ); // copy row to output rowset(s); if ( meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit() ) { // limit has been reached: stop now. setOutputDone(); return false; } } catch ( JsonInputException e ) { if ( !getStepMeta().isDoingErrorHandling() ) { stopErrorExecution( e ); return false; } } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "JsonInput.ErrorInStepRunning", e.getMessage() ) ); if ( getStepMeta().isDoingErrorHandling() ) { sendErrorRow( e.toString() ); } else { incrementErrors(); stopErrorExecution( e ); return false; } } return true; } private void stopErrorExecution( Exception e ) { stopAll(); setOutputDone(); } @Override protected void prepareToRowProcessing() throws KettleException, KettleStepException, KettleValueException { if ( !meta.isInFields() ) { data.outputRowMeta = new RowMeta(); if ( !meta.isDoNotFailIfNoFile() && data.files.nrOfFiles() == 0 ) { String errMsg = BaseMessages.getString( PKG, "JsonInput.Log.NoFiles" ); logError( errMsg ); inputError( errMsg ); } } else { data.readrow = getRow(); data.inputRowMeta = getInputRowMeta(); if ( data.inputRowMeta == null ) { data.hasFirstRow = false; return; } data.hasFirstRow = true; data.outputRowMeta = data.inputRowMeta.clone(); // Check if source field is provided if ( Utils.isEmpty( meta.getFieldValue() ) ) { logError( BaseMessages.getString( PKG, "JsonInput.Log.NoField" ) ); throw new KettleException( BaseMessages.getString( PKG, "JsonInput.Log.NoField" ) ); } // cache the position of the field if ( data.indexSourceField < 0 ) { data.indexSourceField = getInputRowMeta().indexOfValue( meta.getFieldValue() ); if ( data.indexSourceField < 0 ) { logError( BaseMessages.getString( PKG, "JsonInput.Log.ErrorFindingField", meta.getFieldValue() ) ); throw new KettleException( BaseMessages.getString( PKG, "JsonInput.Exception.CouldnotFindField", meta.getFieldValue() ) ); } } // if RemoveSourceField option is set, we remove the source field from the output meta if ( meta.isRemoveSourceField() ) { data.outputRowMeta.removeValueMeta( data.indexSourceField ); // Get total previous fields minus one since we remove source field data.totalpreviousfields = data.inputRowMeta.size() - 1; } else { // Get total previous fields data.totalpreviousfields = data.inputRowMeta.size(); } } meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); // Create convert meta-data objects that will contain Date & Number formatters data.convertRowMeta = data.outputRowMeta.cloneToType( ValueMetaInterface.TYPE_STRING ); data.inputs = new InputsReader( this, meta, data, new InputErrorHandler() ).iterator(); // data.recordnr = 0; data.readerRowSet = new QueueRowSet(); data.readerRowSet.setDone(); this.rowOutputConverter = new RowOutputConverter( getLogChannel() ); // provide reader input fields with real path [PDI-15942] JsonInputField[] inputFields = new JsonInputField[data.nrInputFields]; for ( int i = 0; i < data.nrInputFields; i++ ) { JsonInputField field = meta.getInputFields()[ i ].clone(); field.setPath( environmentSubstitute( field.getPath() ) ); inputFields[i] = field; } data.reader.setFields( inputFields ); } private void addFileToResultFilesname( FileObject file ) { if ( meta.addResultFile() ) { // Add this to the result file names... ResultFile resultFile = new ResultFile( ResultFile.FILE_TYPE_GENERAL, file, getTransMeta().getName(), getStepname() ); resultFile.setComment( BaseMessages.getString( PKG, "JsonInput.Log.FileAddedResult" ) ); addResultFile( resultFile ); } } public boolean onNewFile( FileObject file ) throws FileSystemException { if ( file == null ) { String errMsg = BaseMessages.getString( PKG, "JsonInput.Log.IsNotAFile", "null" ); logError( errMsg ); inputError( errMsg ); return false; } else if ( !file.exists() ) { String errMsg = BaseMessages.getString( PKG, "JsonInput.Log.IsNotAFile", file.getName().getFriendlyURI() ); logError( errMsg ); inputError( errMsg ); return false; } if ( hasAdditionalFileFields() ) { fillFileAdditionalFields( data, file ); } if ( file.getContent().getSize() == 0 ) { // log only basic as a warning (was before logError) if ( meta.isIgnoreEmptyFile() ) { logBasic( BaseMessages.getString( PKG, "JsonInput.Error.FileSizeZero", "" + file.getName() ) ); } else { logError( BaseMessages.getString( PKG, "JsonInput.Error.FileSizeZero", "" + file.getName() ) ); incrementErrors(); return false; } } return true; } @Override protected void fillFileAdditionalFields( JsonInputData data, FileObject file ) throws FileSystemException { super.fillFileAdditionalFields( data, file ); data.filename = KettleVFS.getFilename( file ); data.filenr++; if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JsonInput.Log.OpeningFile", file.toString() ) ); } addFileToResultFilesname( file ); } private void parseNextInputToRowSet( InputStream input ) throws KettleException { try { data.readerRowSet = data.reader.parse( input ); input.close(); } catch ( KettleException ke ) { logInputError( ke ); throw new JsonInputException( ke ); } catch ( Exception e ) { logInputError( e ); throw new JsonInputException( e ); } } private void logInputError( KettleException e ) { logError( e.getLocalizedMessage(), e ); inputError( e.getLocalizedMessage() ); } private void logInputError( Exception e ) { String errMsg = ( !meta.isInFields() || meta.getIsAFile() ) ? BaseMessages.getString( PKG, "JsonReader.Error.ParsingFile", data.filename ) : BaseMessages.getString( PKG, "JsonReader.Error.ParsingString", data.readrow[ data.indexSourceField ] ); logError( errMsg, e ); inputError( errMsg ); } private void incrementErrors() { setErrors( getErrors() + 1 ); } private void inputError( String errorMsg ) { if ( getStepMeta().isDoingErrorHandling() ) { sendErrorRow( errorMsg ); } else { incrementErrors(); } } private class InputErrorHandler implements InputsReader.ErrorHandler { @Override public void error( Exception e ) { logError( BaseMessages.getString( PKG, "JsonInput.Log.UnexpectedError", e.toString() ) ); setErrors( getErrors() + 1 ); } @Override public void fileOpenError( FileObject file, FileSystemException e ) { String msg = BaseMessages.getString( PKG, "JsonInput.Log.UnableToOpenFile", "" + data.filenr, file.toString(), e.toString() ); logError( msg ); inputError( msg ); } @Override public void fileCloseError( FileObject file, FileSystemException e ) { error( e ); } } /** * get final row for output */ private Object[] getOneOutputRow() throws KettleException { if ( meta.isInFields() && !data.hasFirstRow ) { return null; } Object[] rawReaderRow = null; while ( ( rawReaderRow = data.readerRowSet.getRow() ) == null ) { if ( data.inputs.hasNext() && data.readerRowSet.isDone() ) { try ( InputStream nextIn = data.inputs.next() ) { if ( nextIn != null ) { parseNextInputToRowSet( nextIn ); } else { parseNextInputToRowSet( new ByteArrayInputStream( EMPTY_JSON ) ); } } catch ( IOException e ) { logError( BaseMessages.getString( PKG, "JsonInput.Log.UnexpectedError", e.toString() ), e ); incrementErrors(); } } else { if ( isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JsonInput.Log.FinishedProcessing" ) ); } return null; } } boolean rowContainsData = Arrays.stream( rawReaderRow ).anyMatch( el -> el != null ); Object[] outputRow; if ( rowContainsData ) { outputRow = rowOutputConverter.getRow( buildBaseOutputRow(), rawReaderRow, data ); } else { outputRow = buildBaseOutputRow(); } addExtraFields( outputRow, data ); return outputRow; } private void sendErrorRow( String errorMsg ) { try { // same error as before String defaultErrCode = "JsonInput001"; if ( data.readrow != null ) { putError( getInputRowMeta(), data.readrow, 1, errorMsg, meta.getFieldValue(), defaultErrCode ); } else { // when no input only error fields are recognized putError( new RowMeta(), new Object[ 0 ], 1, errorMsg, null, defaultErrCode ); } } catch ( KettleStepException e ) { logError( e.getLocalizedMessage(), e ); } } private boolean hasAdditionalFileFields() { return data.file != null; } private boolean isEmpty( RowSet readerRowSet ) { return readerRowSet.size() == 0 && readerRowSet.isDone(); } /** * allocates out row */ private Object[] buildBaseOutputRow() { Object[] outputRowData; if ( data.readrow != null ) { if ( meta.isRemoveSourceField() && data.indexSourceField > -1 ) { // skip the source field in the output array int sz = data.readrow.length; outputRowData = RowDataUtil.allocateRowData( data.outputRowMeta.size() ); int ii = 0; for ( int i = 0; i < sz; i++ ) { if ( i != data.indexSourceField ) { outputRowData[ ii++ ] = data.readrow[ i ]; } } } else { outputRowData = RowDataUtil.createResizedCopy( data.readrow, data.outputRowMeta.size() ); } } else { outputRowData = RowDataUtil.allocateRowData( data.outputRowMeta.size() ); } return outputRowData; } // should be refactored private void addExtraFields( Object[] outputRowData, JsonInputData data ) { int rowIndex = data.totalpreviousfields + data.nrInputFields; // See if we need to add the filename to the row... if ( meta.includeFilename() && !Utils.isEmpty( meta.getFilenameField() ) ) { outputRowData[ rowIndex++ ] = data.filename; } // See if we need to add the row number to the row... if ( meta.includeRowNumber() && !Utils.isEmpty( meta.getRowNumberField() ) ) { outputRowData[ rowIndex++ ] = new Long( data.rownr ); } // Possibly add short filename... if ( meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0 ) { outputRowData[ rowIndex++ ] = data.shortFilename; } // Add Extension if ( meta.getExtensionField() != null && meta.getExtensionField().length() > 0 ) { outputRowData[ rowIndex++ ] = data.extension; } // add path if ( meta.getPathField() != null && meta.getPathField().length() > 0 ) { outputRowData[ rowIndex++ ] = data.path; } // Add Size if ( meta.getSizeField() != null && meta.getSizeField().length() > 0 ) { outputRowData[ rowIndex++ ] = new Long( data.size ); } // add Hidden if ( meta.isHiddenField() != null && meta.isHiddenField().length() > 0 ) { outputRowData[ rowIndex++ ] = new Boolean( data.path ); } // Add modification date if ( meta.getLastModificationDateField() != null && meta.getLastModificationDateField().length() > 0 ) { outputRowData[ rowIndex++ ] = data.lastModificationDateTime; } // Add Uri if ( meta.getUriField() != null && meta.getUriField().length() > 0 ) { outputRowData[ rowIndex++ ] = data.uriName; } // Add RootUri if ( meta.getRootUriField() != null && meta.getRootUriField().length() > 0 ) { outputRowData[ rowIndex++ ] = data.rootUriName; } } private void createReader() throws KettleException { data.reader = new FastJsonReader( meta.getInputFields(), log ); data.reader.setIgnoreMissingPath( meta.isIgnoreMissingPath() ); } @Override public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { meta = (JsonInputMeta) smi; data = (JsonInputData) sdi; if ( data.file != null ) { IOUtils.closeQuietly( data.file ); } data.inputs = null; data.reader = null; data.readerRowSet = null; data.repeatedFields = null; super.dispose( smi, sdi ); } /** * Only to comply with super, does nothing good. * * @throws NotImplementedException everytime */ @Override protected IBaseFileInputReader createReader( JsonInputMeta meta, JsonInputData data, FileObject file ) throws Exception { throw new NotImplementedException(); } }