/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.xmljoin; import java.io.StringReader; import java.io.StringWriter; import javax.xml.parsers.DocumentBuilder; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; import org.pentaho.di.core.BlockingRowSet; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.xml.XMLParserFactoryProducer; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; /** * Converts input rows to one or more XML files. * * @author Matt * @since 14-jan-2006 */ public class XMLJoin extends BaseStep implements StepInterface { private static Class<?> PKG = XMLJoinMeta.class; // for i18n purposes, needed by Translator2!! private XMLJoinMeta meta; private XMLJoinData data; private Transformer transformer; public XMLJoin( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } @Override public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (XMLJoinMeta) smi; data = (XMLJoinData) sdi; XPath xpath = XPathFactory.newInstance().newXPath(); // if first row we do some initializing and process the first row of the target XML Step if ( first ) { first = false; int target_field_id = -1; XMLJoinMeta meta = (XMLJoinMeta) smi; // Get the two input row sets data.TargetRowSet = findInputRowSet( meta.getTargetXMLstep() ); data.SourceRowSet = findInputRowSet( meta.getSourceXMLstep() ); // get the first line from the target row set Object[] rTarget = getRowFrom( data.TargetRowSet ); if ( rTarget == null ) { // nothing to do logBasic( BaseMessages.getString( PKG, "XMLJoin.NoRowsFoundInTarget" ) ); setOutputDone(); return false; } // get target xml String[] target_field_names = data.TargetRowSet.getRowMeta().getFieldNames(); for ( int i = 0; i < target_field_names.length; i++ ) { if ( meta.getTargetXMLfield().equals( target_field_names[i] ) ) { target_field_id = i; } } // Throw exception if target field has not been found if ( target_field_id == -1 ) { throw new KettleException( BaseMessages.getString( PKG, "XMLJoin.Exception.FieldNotFound", meta .getTargetXMLfield() ) ); } data.outputRowMeta = data.TargetRowSet.getRowMeta().clone(); meta.getFields( data.outputRowMeta, getStepname(), new RowMetaInterface[] { data.TargetRowSet.getRowMeta() }, null, getTransMeta(), repository, metaStore ); data.outputRowData = rTarget.clone(); // get the target xml structure and create a DOM String strTarget = (String) rTarget[target_field_id]; // parse the XML as a W3C Document InputSource inputSource = new InputSource( new StringReader( strTarget ) ); data.XPathStatement = meta.getTargetXPath(); try { DocumentBuilder builder = XMLParserFactoryProducer.createSecureDocBuilderFactory().newDocumentBuilder(); data.targetDOM = builder.parse( inputSource ); if ( !meta.isComplexJoin() ) { data.targetNode = (Node) xpath.evaluate( data.XPathStatement, data.targetDOM, XPathConstants.NODE ); if ( data.targetNode == null ) { throw new KettleXMLException( "XPath statement returned no result [" + data.XPathStatement + "]" ); } } } catch ( Exception e ) { throw new KettleXMLException( e ); } } Object[] rJoinSource = getRowFrom( data.SourceRowSet ); // This also waits for a row to be finished. if ( rJoinSource == null ) { // no more input to be expected... create the output row try { if ( meta.isOmitNullValues() ) { removeEmptyNodes( data.targetDOM.getChildNodes() ); } // create string from xml tree StringWriter sw = new StringWriter(); StreamResult resultXML = new StreamResult( sw ); DOMSource source = new DOMSource( data.targetDOM ); getTransformer().transform( source, resultXML ); int outputIndex = data.outputRowMeta.size() - 1; // send the row to the next steps... putRow( data.outputRowMeta, RowDataUtil.addValueData( data.outputRowData, outputIndex, sw.toString() ) ); // finishing up setOutputDone(); return false; } catch ( Exception e ) { throw new KettleException( e ); } } else { if ( data.iSourceXMLField == -1 ) { // assume failure // get the column of the join xml set // get target xml String[] source_field_names = data.SourceRowSet.getRowMeta().getFieldNames(); for ( int i = 0; i < source_field_names.length; i++ ) { if ( meta.getSourceXMLfield().equals( source_field_names[i] ) ) { data.iSourceXMLField = i; } } // Throw exception if source xml field has not been found if ( data.iSourceXMLField == -1 ) { throw new KettleException( BaseMessages.getString( PKG, "XMLJoin.Exception.FieldNotFound", meta .getSourceXMLfield() ) ); } } if ( meta.isComplexJoin() && data.iCompareFieldID == -1 ) { // get the column of the compare value String[] source_field_names = data.SourceRowSet.getRowMeta().getFieldNames(); for ( int i = 0; i < source_field_names.length; i++ ) { if ( meta.getJoinCompareField().equals( source_field_names[i] ) ) { data.iCompareFieldID = i; } } // Throw exception if source xml field has not been found if ( data.iCompareFieldID == -1 ) { throw new KettleException( BaseMessages.getString( PKG, "XMLJoin.Exception.FieldNotFound", meta .getJoinCompareField() ) ); } } // get XML tags to join String strJoinXML = (String) rJoinSource[data.iSourceXMLField]; try { DocumentBuilder builder = XMLParserFactoryProducer.createSecureDocBuilderFactory().newDocumentBuilder(); Document joinDocument = builder.parse( new InputSource( new StringReader( strJoinXML ) ) ); Node node = data.targetDOM.importNode( joinDocument.getDocumentElement(), true ); if ( meta.isComplexJoin() ) { String strCompareValue = rJoinSource[data.iCompareFieldID].toString(); String strXPathStatement = data.XPathStatement.replace( "?", strCompareValue ); data.targetNode = (Node) xpath.evaluate( strXPathStatement, data.targetDOM, XPathConstants.NODE ); if ( data.targetNode == null ) { throw new KettleXMLException( "XPath statement returned no result [" + strXPathStatement + "]" ); } } data.targetNode.appendChild( node ); } catch ( Exception e ) { throw new KettleException( e ); } } return true; } @Override public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (XMLJoinMeta) smi; data = (XMLJoinData) sdi; if ( !super.init( smi, sdi ) ) { return false; } try { Transformer transformer = TransformerFactory.newInstance().newTransformer(); if ( meta.getEncoding() != null ) { transformer.setOutputProperty( OutputKeys.ENCODING, meta.getEncoding() ); } if ( meta.isOmitXMLHeader() ) { transformer.setOutputProperty( OutputKeys.OMIT_XML_DECLARATION, "yes" ); } transformer.setOutputProperty( OutputKeys.INDENT, "no" ); setTransformer( transformer ); // See if a main step is supplied: in that case move the corresponding rowset to position 0 // for ( int i = 0; i < getInputRowSets().size(); i++ ) { BlockingRowSet rs = (BlockingRowSet) getInputRowSets().get( i ); if ( rs.getOriginStepName().equalsIgnoreCase( meta.getTargetXMLstep() ) ) { // swap this one and position 0...that means, the main stream is always stream 0 --> easy! // BlockingRowSet zero = (BlockingRowSet) getInputRowSets().get( 0 ); getInputRowSets().set( 0, rs ); getInputRowSets().set( i, zero ); } } } catch ( Exception e ) { log.logError( BaseMessages.getString( PKG, "XMLJoin.Error.Init" ), e ); return false; } return true; } @Override public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { meta = (XMLJoinMeta) smi; data = (XMLJoinData) sdi; super.dispose( smi, sdi ); } private void setTransformer( Transformer transformer ) { this.transformer = transformer; } private Transformer getTransformer() { return transformer; } private void removeEmptyNodes( NodeList nodes ) { for ( int i = 0; i < nodes.getLength(); i++ ) { Node node = nodes.item( i ); // Process the tree bottom-up if ( node.hasChildNodes() ) { removeEmptyNodes( node.getChildNodes() ); } boolean nodeIsEmpty = node.getNodeType() == Node.ELEMENT_NODE && !node.hasAttributes() && !node.hasChildNodes() && node.getTextContent().length() == 0; if ( nodeIsEmpty ) { // We shifted elements left, do not increment counter node.getParentNode().removeChild( node ); i--; } } } }