/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.xmlinputsax;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Read all sorts of text files, convert them to rows and writes these to one or more output streams.
*
* @author Matt
* @since 4-apr-2003
*/
public class XMLInputSax extends BaseStep implements StepInterface {
private XMLInputSaxMeta meta;
private XMLInputSaxData data;
public XMLInputSax( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
if ( first ) {
first = false;
data.outputRowMeta = new RowMeta();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
// For String to <type> conversions, we allocate a conversion meta data row as well...
//
data.convertRowMeta = data.outputRowMeta.cloneToType( ValueMetaInterface.TYPE_STRING );
}
Object[] outputRowData = getRowFromXML();
if ( outputRowData == null ) {
setOutputDone(); // signal end to receiver(s)
return false; // This is the end of this step.
}
if ( log.isRowLevel() ) {
logRowlevel( "Read row: " + data.outputRowMeta.getString( outputRowData ) );
}
putRow( data.outputRowMeta, outputRowData );
// limit has been reached: stop now.
//
if ( meta.getRowLimit() > 0 && data.rownr >= meta.getRowLimit() ) {
setOutputDone();
return false;
}
return true;
}
private Object[] getRowFromXML() throws KettleValueException {
// finished reading the file, read the next file!
//
if ( data.document == null ) {
data.filename = null;
} else if ( !data.document.hasNext() ) {
data.filename = null;
}
// First, see if we need to open a new file
if ( data.filename == null ) {
if ( !openNextFile() ) {
return null;
}
}
Object[] outputRowData = data.document.getNext();
int outputIndex = meta.getInputFields().length;
// Node itemNode = XMLHandler.getSubNodeByNr(data.section,
// data.itemElement, data.itemPosition);
// data.itemPosition++;
// See if we need to add the filename to the row...
//
if ( meta.includeFilename() && !Utils.isEmpty( meta.getFilenameField() ) ) {
outputRowData[outputIndex++] = data.filename;
}
// See if we need to add the row number to the row...
if ( meta.includeRowNumber() && !Utils.isEmpty( meta.getRowNumberField() ) ) {
outputRowData[outputIndex] = new Long( data.rownr );
}
data.rownr++;
return outputRowData;
}
private boolean openNextFile() {
try {
if ( data.filenr >= data.files.length ) { // finished processing!
if ( log.isDetailed() ) {
logDetailed( "Finished processing files." );
}
return false;
}
// Is this the last file?
data.last_file = ( data.filenr == data.files.length - 1 );
data.filename = environmentSubstitute( data.files[data.filenr] );
if ( log.isBasic() ) {
logBasic( "Opening file: " + data.filename );
}
// Move file pointer ahead!
data.filenr++;
// Open the XML document
data.document = new XMLInputSaxDataRetriever( log, data.filename, meta, data );
data.document.runExample();
} catch ( Exception e ) {
logError( "Couldn't open file #" + data.filenr + " : " + data.filename, e );
stopAll();
setErrors( 1 );
return false;
}
return true;
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (XMLInputSaxMeta) smi;
data = (XMLInputSaxData) sdi;
if ( super.init( smi, sdi ) ) {
data.files = meta.getFilePaths( getTransMeta() );
if ( data.files == null || data.files.length == 0 ) {
logError( "No file(s) specified! Stop processing." );
return false;
}
if ( meta.getInputPosition().length == 0 ) {
logError( "No location specified! Stop processing." );
return false;
}
if ( meta.getInputFields().length == 0 ) {
logError( "No fields specified! Stop processing." );
return false;
}
data.rownr = 1L;
return true;
}
return false;
}
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (XMLInputSaxMeta) smi;
data = (XMLInputSaxData) sdi;
super.dispose( smi, sdi );
}
}