/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.xmlinput; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** * Read all sorts of text files, convert them to rows and writes these to one or more output streams. * * @author Matt * @since 4-apr-2003 */ public class XMLInput extends BaseStep implements StepInterface { private static Class<?> PKG = XMLInputMeta.class; // for i18n purposes, needed by Translator2!! private XMLInputMeta meta; private XMLInputData data; public XMLInput( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { if ( first ) { // we just got started first = false; data.outputRowMeta = new RowMeta(); meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); // For String to <type> conversions, we allocate a conversion meta data row as well... // data.convertRowMeta = data.outputRowMeta.cloneToType( ValueMetaInterface.TYPE_STRING ); } Object[] outputRowData = getRowFromXML(); if ( outputRowData == null ) { setOutputDone(); // signal end to receiver(s) return false; // This is the end of this step. } if ( log.isRowLevel() ) { logRowlevel( BaseMessages.getString( PKG, "XMLInput.Log.ReadRow", outputRowData.toString() ) ); } incrementLinesInput(); putRow( data.outputRowMeta, outputRowData ); // limit has been reached, stop now. if ( meta.getRowLimit() > 0 && data.rownr >= meta.getRowLimit() ) { setOutputDone(); return false; } return true; } private Object[] getRowFromXML() throws KettleValueException { // finished reading the file, read the next file while ( data.itemPosition >= data.itemCount || data.file == null ) { data.file = null; if ( !openNextFile() ) { return null; } } Object[] outputRowData = buildEmptyRow(); // Get the item in the XML file... // First get the appropriate node Node itemNode; if ( meta.getInputPosition().length > 1 ) { itemNode = XMLHandler.getSubNodeByNr( data.section, data.itemElement, data.itemPosition ); } else { itemNode = data.section; // Only the root node, 1 element to read // in the whole document. } data.itemPosition++; // Read from the Node... for ( int i = 0; i < meta.getInputFields().length; i++ ) { Node node = itemNode; XMLInputField xmlInputField = meta.getInputFields()[i]; // This value will contain the value we're looking for... // String value = null; for ( int p = 0; ( value == null ) && node != null && p < xmlInputField.getFieldPosition().length; p++ ) { XMLInputFieldPosition pos = xmlInputField.getFieldPosition()[p]; switch ( pos.getType() ) { case XMLInputFieldPosition.XML_ELEMENT: if ( pos.getElementNr() <= 1 ) { Node subNode = XMLHandler.getSubNode( node, pos.getName() ); if ( subNode != null ) { if ( p == xmlInputField.getFieldPosition().length - 1 ) { // last level value = XMLHandler.getNodeValue( subNode ); } } else { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "XMLInput.Log.UnableToFindPosition", pos.toString(), node .toString() ) ); } } node = subNode; } else { // Multiple possible values: get number // pos.getElementNr()! Node subNode = XMLHandler.getSubNodeByNr( node, pos.getName(), pos.getElementNr() - 1, false ); if ( subNode != null ) { if ( p == xmlInputField.getFieldPosition().length - 1 ) { // last level value = XMLHandler.getNodeValue( subNode ); } } else { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "XMLInput.Log.UnableToFindPosition", pos.toString(), node .toString() ) ); } } node = subNode; } break; case XMLInputFieldPosition.XML_ATTRIBUTE: value = XMLHandler.getTagAttribute( node, pos.getName() ); break; case XMLInputFieldPosition.XML_ROOT: value = XMLHandler.getNodeValue( node ); break; default: break; } } // OK, we have grabbed the string called value // Trim it, convert it, ... // DO Trimming! switch ( xmlInputField.getTrimType() ) { case XMLInputField.TYPE_TRIM_LEFT: value = Const.ltrim( value ); break; case XMLInputField.TYPE_TRIM_RIGHT: value = Const.rtrim( value ); break; case XMLInputField.TYPE_TRIM_BOTH: value = Const.trim( value ); break; default: break; } // System.out.println("after trim, field #"+i+" : "+v); // DO CONVERSIONS... // ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta( i ); ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta( i ); outputRowData[i] = targetValueMeta.convertData( sourceValueMeta, value ); // Do we need to repeat this field if it is null? if ( meta.getInputFields()[i].isRepeated() ) { if ( data.previousRow != null && Utils.isEmpty( value ) ) { outputRowData[i] = data.previousRow[i]; } } } // End of loop over fields... int outputIndex = meta.getInputFields().length; // See if we need to add the filename to the row... if ( meta.includeFilename() && !Utils.isEmpty( meta.getFilenameField() ) ) { outputRowData[outputIndex++] = KettleVFS.getFilename( data.file ); } // See if we need to add the row number to the row... if ( meta.includeRowNumber() && !Utils.isEmpty( meta.getRowNumberField() ) ) { outputRowData[outputIndex++] = new Long( data.rownr ); } RowMetaInterface irow = getInputRowMeta(); data.previousRow = irow == null ? outputRowData : (Object[]) irow.cloneRow( outputRowData ); // copy it to make // surely the next step doesn't change it in between... data.rownr++; // Throw away the information in the item? NodeList nodeList = itemNode.getChildNodes(); for ( int i = 0; i < nodeList.getLength(); i++ ) { itemNode.removeChild( nodeList.item( i ) ); } return outputRowData; } /** * Build an empty row based on the meta-data... * * @return */ private Object[] buildEmptyRow() { return RowDataUtil.allocateRowData( data.outputRowMeta.size() ); } private boolean openNextFile() { try { if ( data.filenr >= data.files.size() ) { // finished processing! if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "XMLInput.Log.FinishedProcessing" ) ); } return false; } // Is this the last file? data.last_file = ( data.filenr == data.files.size() - 1 ); data.file = data.files.get( data.filenr ); logBasic( BaseMessages.getString( PKG, "XMLInput.Log.OpeningFile", data.file.toString() ) ); // Move file pointer ahead! data.filenr++; String baseURI = this.environmentSubstitute( meta.getFileBaseURI() ); if ( Utils.isEmpty( baseURI ) ) { baseURI = data.file.getParent().getName().getURI(); } // Open the XML document data.document = XMLHandler.loadXMLFile( data.file, baseURI, meta.isIgnoreEntities(), meta.isNamespaceAware() ); // Add this to the result file names... ResultFile resultFile = new ResultFile( ResultFile.FILE_TYPE_GENERAL, data.file, getTransMeta().getName(), getStepname() ); resultFile.setComment( "File was read by an XML input step" ); addResultFile( resultFile ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "XMLInput.Log.FileOpened", data.file.toString() ) ); } // Position in the file... data.section = data.document; for ( int i = 0; i < meta.getInputPosition().length - 1; i++ ) { data.section = XMLHandler.getSubNode( data.section, meta.getInputPosition()[i] ); } // Last element gets repeated: what's the name? data.itemElement = meta.getInputPosition()[meta.getInputPosition().length - 1]; data.itemCount = XMLHandler.countNodes( data.section, data.itemElement ); data.itemPosition = meta.getNrRowsToSkip(); } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "XMLInput.Log.UnableToOpenFile", "" + data.filenr, data.file .toString(), e.toString() ) ); stopAll(); setErrors( 1 ); return false; } return true; } public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (XMLInputMeta) smi; data = (XMLInputData) sdi; if ( super.init( smi, sdi ) ) { data.files = meta.getFiles( this ).getFiles(); if ( data.files == null || data.files.size() == 0 ) { logError( BaseMessages.getString( PKG, "XMLInput.Log.NoFiles" ) ); return false; } data.rownr = 1L; return true; } return false; } public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { meta = (XMLInputMeta) smi; data = (XMLInputData) sdi; super.dispose( smi, sdi ); } }