/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.propertyinput;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import org.apache.commons.vfs2.FileObject;
import org.ini4j.Wini;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.fileinput.FileInputList;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Read all Properties files (& INI files) , convert them to rows and writes these to one or more output streams.
*
* @author Samatar
* @since 24-03-2008
*/
public class PropertyInput extends BaseStep implements StepInterface {
private static Class<?> PKG = PropertyInputMeta.class; // for i18n purposes, needed by Translator2!!
private PropertyInputMeta meta;
private PropertyInputData data;
public PropertyInput( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
if ( first && !meta.isFileField() ) {
data.files = meta.getFiles( this );
if ( data.files == null || data.files.nrOfFiles() == 0 ) {
throw new KettleException( BaseMessages.getString( PKG, "PropertyInput.Log.NoFiles" ) );
}
handleMissingFiles();
// Create the output row meta-data
data.outputRowMeta = new RowMeta();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); // get the metadata
// populated
// Create convert meta-data objects that will contain Date & Number formatters
//
data.convertRowMeta = data.outputRowMeta.cloneToType( ValueMetaInterface.TYPE_STRING );
}
Object[] r = null;
boolean sendToErrorRow = false;
String errorMessage = null;
try {
// Grab one row
Object[] outputRowData = getOneRow();
if ( outputRowData == null ) {
setOutputDone(); // signal end to receiver(s)
return false; // end of data or error.
}
putRow( data.outputRowMeta, outputRowData ); // copy row to output rowset(s);
if ( meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit() ) { // limit has been reached: stop now.
setOutputDone();
return false;
}
} catch ( KettleException e ) {
if ( getStepMeta().isDoingErrorHandling() ) {
sendToErrorRow = true;
errorMessage = e.toString();
} else {
logError( BaseMessages.getString( PKG, "PropertyInput.ErrorInStepRunning", e.getMessage() ) );
setErrors( 1 );
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
if ( sendToErrorRow ) {
// Simply add this row to the error row
putError( getInputRowMeta(), r, 1, errorMessage, null, "PropertyInput001" );
}
}
return true;
}
private void handleMissingFiles() throws KettleException {
List<FileObject> nonExistantFiles = data.files.getNonExistantFiles();
if ( nonExistantFiles.size() != 0 ) {
String message = FileInputList.getRequiredFilesDescription( nonExistantFiles );
logError( BaseMessages.getString( PKG, "PropertyInput.Log.RequiredFilesTitle" ), BaseMessages.getString(
PKG, "PropertyInput.Log.RequiredFiles", message ) );
throw new KettleException( BaseMessages.getString( PKG, "PropertyInput.Log.RequiredFilesMissing", message ) );
}
List<FileObject> nonAccessibleFiles = data.files.getNonAccessibleFiles();
if ( nonAccessibleFiles.size() != 0 ) {
String message = FileInputList.getRequiredFilesDescription( nonAccessibleFiles );
logError( BaseMessages.getString( PKG, "PropertyInput.Log.RequiredFilesTitle" ), BaseMessages.getString(
PKG, "PropertyInput.Log.RequiredNotAccessibleFiles", message ) );
throw new KettleException( BaseMessages.getString(
PKG, "PropertyInput.Log.RequiredNotAccessibleFilesMissing", message ) );
}
}
private Object[] getOneRow() throws KettleException {
try {
if ( meta.isFileField() ) {
while ( ( data.readrow == null )
|| ( ( data.propfiles && !data.it.hasNext() ) || ( !data.propfiles && !data.iniIt.hasNext() ) ) ) {
// if (!openNextFile()) return null;
// In case we read all sections
// maybe we have to change section for ini files...
if ( !data.propfiles && data.realSection == null && data.readrow != null && data.itSection.hasNext() ) {
data.iniSection = data.wini.get( data.itSection.next().toString() );
data.iniIt = data.iniSection.keySet().iterator();
} else {
if ( !openNextFile() ) {
return null;
}
}
}
} else {
while ( ( data.file == null )
|| ( ( data.propfiles && !data.it.hasNext() ) || ( !data.propfiles && !data.iniIt.hasNext() ) ) ) {
// In case we read all sections
// maybe we have to change section for ini files...
if ( !data.propfiles && data.realSection == null && data.file != null && data.itSection.hasNext() ) {
data.iniSection = data.wini.get( data.itSection.next().toString() );
data.iniIt = data.iniSection.keySet().iterator();
} else {
if ( !openNextFile() ) {
return null;
}
}
}
}
} catch ( Exception IO ) {
logError( "Unable to read row from file : " + IO.getMessage() );
return null;
}
// Build an empty row based on the meta-data
Object[] r = buildEmptyRow();
// Create new row or clone
if ( meta.isFileField() ) {
r = data.readrow.clone();
}
try {
String key = null;
if ( data.propfiles ) {
key = data.it.next().toString();
} else {
key = data.iniIt.next().toString();
}
// Execute for each Input field...
for ( int i = 0; i < meta.getInputFields().length; i++ ) {
// Get field value
String value = null;
if ( meta.getInputFields()[i].getColumnCode().equals(
PropertyInputField.ColumnCode[PropertyInputField.COLUMN_KEY] ) ) {
value = key;
} else {
if ( meta.isResolveValueVariable() ) {
if ( data.propfiles ) {
value = environmentSubstitute( data.pro.getProperty( key ) );
} else {
value = environmentSubstitute( data.iniSection.fetch( key ) ); // for INI files
}
} else {
if ( data.propfiles ) {
value = data.pro.getProperty( key );
} else {
value = data.iniSection.fetch( key ); // for INI files
}
}
}
// DO Trimming!
switch ( meta.getInputFields()[i].getTrimType() ) {
case PropertyInputField.TYPE_TRIM_LEFT:
value = Const.ltrim( value );
break;
case PropertyInputField.TYPE_TRIM_RIGHT:
value = Const.rtrim( value );
break;
case PropertyInputField.TYPE_TRIM_BOTH:
value = Const.trim( value );
break;
default:
break;
}
if ( meta.isFileField() ) {
// Add result field to input stream
r = RowDataUtil.addValueData( r, data.totalpreviousfields + i, value );
}
// DO CONVERSIONS...
//
ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta( data.totalpreviousfields + i );
ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta( data.totalpreviousfields + i );
r[data.totalpreviousfields + i] = targetValueMeta.convertData( sourceValueMeta, value );
// Do we need to repeat this field if it is null?
if ( meta.getInputFields()[i].isRepeated() ) {
if ( data.previousRow != null && Utils.isEmpty( value ) ) {
r[data.totalpreviousfields + i] = data.previousRow[data.totalpreviousfields + i];
}
}
} // End of loop over fields...
int rowIndex = meta.getInputFields().length;
// See if we need to add the filename to the row...
if ( meta.includeFilename() && !Utils.isEmpty( meta.getFilenameField() ) ) {
r[data.totalpreviousfields + rowIndex++] = data.filename;
}
// See if we need to add the row number to the row...
if ( meta.includeRowNumber() && !Utils.isEmpty( meta.getRowNumberField() ) ) {
r[data.totalpreviousfields + rowIndex++] = new Long( data.rownr );
}
// See if we need to add the section for INI files ...
if ( meta.includeIniSection() && !Utils.isEmpty( meta.getINISectionField() ) ) {
r[data.totalpreviousfields + rowIndex++] = environmentSubstitute( data.iniSection.getName() );
}
// Possibly add short filename...
if ( meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0 ) {
r[data.totalpreviousfields + rowIndex++] = data.shortFilename;
}
// Add Extension
if ( meta.getExtensionField() != null && meta.getExtensionField().length() > 0 ) {
r[data.totalpreviousfields + rowIndex++] = data.extension;
}
// add path
if ( meta.getPathField() != null && meta.getPathField().length() > 0 ) {
r[data.totalpreviousfields + rowIndex++] = data.path;
}
// Add Size
if ( meta.getSizeField() != null && meta.getSizeField().length() > 0 ) {
r[data.totalpreviousfields + rowIndex++] = new Long( data.size );
}
// add Hidden
if ( meta.isHiddenField() != null && meta.isHiddenField().length() > 0 ) {
r[data.totalpreviousfields + rowIndex++] = new Boolean( data.hidden );
}
// Add modification date
if ( meta.getLastModificationDateField() != null && meta.getLastModificationDateField().length() > 0 ) {
r[data.totalpreviousfields + rowIndex++] = data.lastModificationDateTime;
}
// Add Uri
if ( meta.getUriField() != null && meta.getUriField().length() > 0 ) {
r[data.totalpreviousfields + rowIndex++] = data.uriName;
}
// Add RootUri
if ( meta.getRootUriField() != null && meta.getRootUriField().length() > 0 ) {
r[data.totalpreviousfields + rowIndex++] = data.rootUriName;
}
RowMetaInterface irow = getInputRowMeta();
data.previousRow = irow == null ? r : irow.cloneRow( r ); // copy it to make
// surely the next step doesn't change it in between...
incrementLinesInput();
data.rownr++;
} catch ( Exception e ) {
throw new KettleException( BaseMessages.getString( PKG, "PropertyInput.Error.CanNotReadFromFile", data.file
.toString() ), e );
}
return r;
}
private boolean openNextFile() {
InputStream fis = null;
try {
if ( !meta.isFileField() ) {
if ( data.filenr >= data.files.nrOfFiles() ) { // finished processing!
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "PropertyInput.Log.FinishedProcessing" ) );
}
return false;
}
// Is this the last file?
data.last_file = ( data.filenr == data.files.nrOfFiles() - 1 );
data.file = data.files.getFile( data.filenr );
// Move file pointer ahead!
data.filenr++;
} else {
data.readrow = getRow(); // Get row from input rowset & set row busy!
if ( data.readrow == null ) {
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "PropertyInput.Log.FinishedProcessing" ) );
}
return false;
}
if ( first ) {
first = false;
data.inputRowMeta = getInputRowMeta();
data.outputRowMeta = data.inputRowMeta.clone();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
// Get total previous fields
data.totalpreviousfields = data.inputRowMeta.size();
// Create convert meta-data objects that will contain Date & Number formatters
data.convertRowMeta = data.outputRowMeta.cloneToType( ValueMetaInterface.TYPE_STRING );
// Check is filename field is provided
if ( Utils.isEmpty( meta.getDynamicFilenameField() ) ) {
logError( BaseMessages.getString( PKG, "PropertyInput.Log.NoField" ) );
throw new KettleException( BaseMessages.getString( PKG, "PropertyInput.Log.NoField" ) );
}
// cache the position of the field
if ( data.indexOfFilenameField < 0 ) {
data.indexOfFilenameField = getInputRowMeta().indexOfValue( meta.getDynamicFilenameField() );
if ( data.indexOfFilenameField < 0 ) {
// The field is unreachable !
logError( BaseMessages.getString( PKG, "PropertyInput.Log.ErrorFindingField" )
+ "[" + meta.getDynamicFilenameField() + "]" );
throw new KettleException( BaseMessages.getString(
PKG, "PropertyInput.Exception.CouldnotFindField", meta.getDynamicFilenameField() ) );
}
}
} // End if first
String filename = getInputRowMeta().getString( data.readrow, data.indexOfFilenameField );
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "PropertyInput.Log.FilenameInStream", meta
.getDynamicFilenameField(), filename ) );
}
data.file = KettleVFS.getFileObject( filename, getTransMeta() );
// Check if file exists!
}
// Check if file is empty
// long fileSize= data.file.getContent().getSize();
data.filename = KettleVFS.getFilename( data.file );
// Add additional fields?
if ( meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0 ) {
data.shortFilename = data.file.getName().getBaseName();
}
if ( meta.getPathField() != null && meta.getPathField().length() > 0 ) {
data.path = KettleVFS.getFilename( data.file.getParent() );
}
if ( meta.isHiddenField() != null && meta.isHiddenField().length() > 0 ) {
data.hidden = data.file.isHidden();
}
if ( meta.getExtensionField() != null && meta.getExtensionField().length() > 0 ) {
data.extension = data.file.getName().getExtension();
}
if ( meta.getLastModificationDateField() != null && meta.getLastModificationDateField().length() > 0 ) {
data.lastModificationDateTime = new Date( data.file.getContent().getLastModifiedTime() );
}
if ( meta.getUriField() != null && meta.getUriField().length() > 0 ) {
data.uriName = data.file.getName().getURI();
}
if ( meta.getRootUriField() != null && meta.getRootUriField().length() > 0 ) {
data.rootUriName = data.file.getName().getRootURI();
}
if ( meta.getSizeField() != null && meta.getSizeField().length() > 0 ) {
data.size = new Long( data.file.getContent().getSize() );
}
if ( meta.resetRowNumber() ) {
data.rownr = 0;
}
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "PropertyInput.Log.OpeningFile", data.file.toString() ) );
}
if ( meta.isAddResultFile() ) {
// Add this to the result file names...
ResultFile resultFile =
new ResultFile( ResultFile.FILE_TYPE_GENERAL, data.file, getTransMeta().getName(), getStepname() );
resultFile.setComment( BaseMessages.getString( PKG, "PropertyInput.Log.FileAddedResult" ) );
addResultFile( resultFile );
}
fis = data.file.getContent().getInputStream();
if ( data.propfiles ) {
// load properties file
data.pro = new Properties();
data.pro.load( fis );
data.it = data.pro.keySet().iterator();
} else {
// create wini object
data.wini = new Wini();
if ( !Utils.isEmpty( data.realEncoding ) ) {
data.wini.getConfig().setFileEncoding( Charset.forName( data.realEncoding ) );
}
// load INI file
data.wini.load( fis );
if ( data.realSection != null ) {
// just one section
data.iniSection = data.wini.get( data.realSection );
if ( data.iniSection == null ) {
throw new KettleException( BaseMessages.getString(
PKG, "PropertyInput.Error.CanNotFindSection", data.realSection, "" + data.file.getName() ) );
}
} else {
// We need to fetch all sections
data.itSection = data.wini.keySet().iterator();
data.iniSection = data.wini.get( data.itSection.next().toString() );
}
data.iniIt = data.iniSection.keySet().iterator();
}
if ( log.isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "PropertyInput.Log.FileOpened", data.file.toString() ) );
logDetailed( BaseMessages.getString( PKG, "PropertyInput.log.TotalKey", ""
+ ( data.propfiles ? data.pro.size() : data.iniSection.size() ), KettleVFS.getFilename( data.file ) ) );
}
} catch ( Exception e ) {
logError( BaseMessages.getString( PKG, "PropertyInput.Log.UnableToOpenFile", "" + data.filenr, data.file
.toString(), e.toString() ) );
stopAll();
setErrors( 1 );
return false;
} finally {
BaseStep.closeQuietly( fis );
}
return true;
}
/**
* Build an empty row based on the meta-data...
*
* @return
*/
private Object[] buildEmptyRow() {
Object[] rowData = RowDataUtil.allocateRowData( data.outputRowMeta.size() );
return rowData;
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (PropertyInputMeta) smi;
data = (PropertyInputData) sdi;
if ( super.init( smi, sdi ) ) {
String realEncoding = environmentSubstitute( meta.getEncoding() );
if ( !Utils.isEmpty( realEncoding ) ) {
data.realEncoding = realEncoding;
}
String realSection = environmentSubstitute( meta.getSection() );
if ( !Utils.isEmpty( realSection ) ) {
data.realSection = realSection;
}
data.propfiles =
( PropertyInputMeta.getFileTypeByDesc( meta.getFileType() ) == PropertyInputMeta.FILE_TYPE_PROPERTY );
data.rownr = 1L;
data.totalpreviousfields = 0;
return true;
}
return false;
}
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (PropertyInputMeta) smi;
data = (PropertyInputData) sdi;
if ( data.readrow != null ) {
data.readrow = null;
}
if ( data.iniSection != null ) {
data.iniSection.clear();
}
data.iniSection = null;
if ( data.itSection != null ) {
data.itSection = null;
}
if ( data.file != null ) {
try {
data.file.close();
data.file = null;
} catch ( Exception e ) {
// Ignore errors
}
}
super.dispose( smi, sdi );
}
}