/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.xmloutput; import java.io.File; import java.io.OutputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import org.apache.commons.vfs2.FileObject; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; import org.pentaho.di.trans.steps.xmloutput.XMLField.ContentType; /** * Converts input rows to one or more XML files. * * @author Matt * @since 14-jan-2006 */ public class XMLOutput extends BaseStep implements StepInterface { private static final String EOL = "\n"; // force EOL char because woodstox library encodes CRLF incorrectly private static final XMLOutputFactory XML_OUT_FACTORY = XMLOutputFactory.newInstance(); private XMLOutputMeta meta; private XMLOutputData data; public XMLOutput( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (XMLOutputMeta) smi; data = (XMLOutputData) sdi; Object[] r; boolean result = true; r = getRow(); // This also waits for a row to be finished. if ( first && meta.isDoNotOpenNewFileInit() ) { // no more input to be expected... // In this case, no file was opened. if ( r == null ) { setOutputDone(); return false; } if ( openNewFile() ) { data.OpenedNewFile = true; } else { logError( "Couldn't open file " + meta.getFileName() ); setErrors( 1L ); return false; } } if ( ( r != null && getLinesOutput() > 0 && meta.getSplitEvery() > 0 && ( getLinesOutput() % meta.getSplitEvery() ) == 0 ) ) { // Done with this part or with everything. closeFile(); // Not finished: open another file... if ( r != null ) { if ( !openNewFile() ) { logError( "Unable to open new file (split #" + data.splitnr + "..." ); setErrors( 1 ); return false; } } } if ( r == null ) { // no more input to be expected... setOutputDone(); return false; } writeRowToFile( getInputRowMeta(), r ); data.outputRowMeta = getInputRowMeta().clone(); meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); putRow( data.outputRowMeta, r ); // in case we want it to go further... if ( checkFeedback( getLinesOutput() ) ) { logBasic( "linenr " + getLinesOutput() ); } return result; } private void writeRowToFile( RowMetaInterface rowMeta, Object[] r ) throws KettleException { try { if ( first ) { data.formatRowMeta = rowMeta.clone(); first = false; data.fieldnrs = new int[meta.getOutputFields().length]; for ( int i = 0; i < meta.getOutputFields().length; i++ ) { data.fieldnrs[i] = data.formatRowMeta.indexOfValue( meta.getOutputFields()[i].getFieldName() ); if ( data.fieldnrs[i] < 0 ) { throw new KettleException( "Field [" + meta.getOutputFields()[i].getFieldName() + "] couldn't be found in the input stream!" ); } // Apply the formatting settings to the valueMeta object... // ValueMetaInterface valueMeta = data.formatRowMeta.getValueMeta( data.fieldnrs[i] ); XMLField field = meta.getOutputFields()[i]; valueMeta.setConversionMask( field.getFormat() ); valueMeta.setLength( field.getLength(), field.getPrecision() ); valueMeta.setDecimalSymbol( field.getDecimalSymbol() ); valueMeta.setGroupingSymbol( field.getGroupingSymbol() ); valueMeta.setCurrencySymbol( field.getCurrencySymbol() ); } } if ( meta.getOutputFields() == null || meta.getOutputFields().length == 0 ) { /* * Write all values in stream to text file. */ // OK, write a new row to the XML file: data.writer.writeStartElement( meta.getRepeatElement() ); for ( int i = 0; i < data.formatRowMeta.size(); i++ ) { // Put a space between the XML elements of the row // if ( i > 0 ) { data.writer.writeCharacters( " " ); } ValueMetaInterface valueMeta = data.formatRowMeta.getValueMeta( i ); Object valueData = r[i]; writeField( valueMeta, valueData, valueMeta.getName() ); } } else { /* * Only write the fields specified! */ // Write a new row to the XML file: data.writer.writeStartElement( meta.getRepeatElement() ); // First do the attributes and write them... writeRowAttributes( r ); // Now write the elements // for ( int i = 0; i < meta.getOutputFields().length; i++ ) { XMLField outputField = meta.getOutputFields()[i]; if ( outputField.getContentType() == ContentType.Element ) { if ( i > 0 ) { data.writer.writeCharacters( " " ); // a space between // elements } ValueMetaInterface valueMeta = data.formatRowMeta.getValueMeta( data.fieldnrs[i] ); Object valueData = r[data.fieldnrs[i]]; String elementName = outputField.getElementName(); if ( Utils.isEmpty( elementName ) ) { elementName = outputField.getFieldName(); } if ( !( valueMeta.isNull( valueData ) && meta.isOmitNullValues() ) ) { writeField( valueMeta, valueData, elementName ); } } } } data.writer.writeEndElement(); data.writer.writeCharacters( EOL ); } catch ( Exception e ) { throw new KettleException( "Error writing XML row :" + e.toString() + Const.CR + "Row: " + getInputRowMeta().getString( r ), e ); } incrementLinesOutput(); } void writeRowAttributes( Object[] r ) throws KettleValueException, XMLStreamException { for ( int i = 0; i < meta.getOutputFields().length; i++ ) { XMLField xmlField = meta.getOutputFields()[i]; if ( xmlField.getContentType() == ContentType.Attribute ) { ValueMetaInterface valueMeta = data.formatRowMeta.getValueMeta( data.fieldnrs[i] ); Object valueData = r[data.fieldnrs[i]]; String elementName = xmlField.getElementName(); if ( Utils.isEmpty( elementName ) ) { elementName = xmlField.getFieldName(); } data.writer.writeAttribute( elementName, valueMeta.getString( valueData ) ); } } } private void writeField( ValueMetaInterface valueMeta, Object valueData, String element ) throws KettleStepException { try { String value = valueMeta.getString( valueData ); if ( value != null ) { data.writer.writeStartElement( element ); data.writer.writeCharacters( value ); data.writer.writeEndElement(); } else { data.writer.writeEmptyElement( element ); } } catch ( Exception e ) { throw new KettleStepException( "Error writing line :", e ); } } public String buildFilename( boolean ziparchive ) { return meta.buildFilename( this, getCopy(), data.splitnr, ziparchive ); } public boolean openNewFile() { boolean retval = false; data.writer = null; try { if ( meta.isServletOutput() ) { data.writer = XML_OUT_FACTORY.createXMLStreamWriter( getTrans().getServletPrintWriter() ); if ( meta.getEncoding() != null && meta.getEncoding().length() > 0 ) { data.writer.writeStartDocument( meta.getEncoding(), "1.0" ); } else { data.writer.writeStartDocument( Const.XML_ENCODING, "1.0" ); } data.writer.writeCharacters( EOL ); } else { FileObject file = KettleVFS.getFileObject( buildFilename( true ), getTransMeta() ); if ( meta.isAddToResultFiles() ) { // Add this to the result file names... ResultFile resultFile = new ResultFile( ResultFile.FILE_TYPE_GENERAL, file, getTransMeta().getName(), getStepname() ); resultFile.setComment( "This file was created with a xml output step" ); addResultFile( resultFile ); } OutputStream outputStream; if ( meta.isZipped() ) { OutputStream fos = KettleVFS.getOutputStream( file, false ); data.zip = new ZipOutputStream( fos ); File entry = new File( buildFilename( false ) ); ZipEntry zipentry = new ZipEntry( entry.getName() ); zipentry.setComment( "Compressed by Kettle" ); data.zip.putNextEntry( zipentry ); outputStream = data.zip; } else { OutputStream fos = KettleVFS.getOutputStream( file, false ); outputStream = fos; } if ( meta.getEncoding() != null && meta.getEncoding().length() > 0 ) { logBasic( "Opening output stream in encoding: " + meta.getEncoding() ); data.writer = XML_OUT_FACTORY.createXMLStreamWriter( outputStream, meta.getEncoding() ); data.writer.writeStartDocument( meta.getEncoding(), "1.0" ); } else { logBasic( "Opening output stream in default encoding : " + Const.XML_ENCODING ); data.writer = XML_OUT_FACTORY.createXMLStreamWriter( outputStream ); data.writer.writeStartDocument( Const.XML_ENCODING, "1.0" ); } data.writer.writeCharacters( EOL ); } // OK, write the header & the parent element: data.writer.writeStartElement( meta.getMainElement() ); // Add the name space if defined if ( ( meta.getNameSpace() != null ) && ( !"".equals( meta.getNameSpace() ) ) ) { data.writer.writeDefaultNamespace( meta.getNameSpace() ); } data.writer.writeCharacters( EOL ); retval = true; } catch ( Exception e ) { logError( "Error opening new file : " + e.toString() ); } // System.out.println("end of newFile(), splitnr="+splitnr); data.splitnr++; return retval; } private boolean closeFile() { boolean retval = false; if ( data.OpenedNewFile ) { try { // Close the parent element data.writer.writeEndElement(); data.writer.writeCharacters( EOL ); // System.out.println("Closed xml file..."); data.writer.writeEndDocument(); data.writer.close(); if ( meta.isZipped() ) { // System.out.println("close zip entry "); data.zip.closeEntry(); // System.out.println("finish file..."); data.zip.finish(); data.zip.close(); } retval = true; } catch ( Exception e ) { // Ignore errors } } return retval; } public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (XMLOutputMeta) smi; data = (XMLOutputData) sdi; if ( super.init( smi, sdi ) ) { data.splitnr = 0; if ( !meta.isDoNotOpenNewFileInit() ) { if ( openNewFile() ) { data.OpenedNewFile = true; return true; } else { logError( "Couldn't open file " + meta.getFileName() ); setErrors( 1L ); stopAll(); } } else { return true; } } return false; } public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { meta = (XMLOutputMeta) smi; data = (XMLOutputData) sdi; closeFile(); super.dispose( smi, sdi ); } }