/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.getxmldata; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.commons.vfs2.FileObject; import org.pentaho.di.core.annotations.Step; import org.pentaho.di.core.CheckResult; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleStepException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.fileinput.FileInputList; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.row.ValueMeta; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaFactory; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.di.resource.ResourceDefinition; import org.pentaho.di.resource.ResourceNamingInterface; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStepMeta; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInjectionInterface; import org.pentaho.di.trans.step.StepMetaInterface; import org.pentaho.metastore.api.IMetaStore; import org.w3c.dom.Node; /** * Store run-time data on the getXMLData step. */ @Step( id = "getXMLData", image = "GXD.svg", i18nPackageName = "org.pentaho.di.trans.steps.getxmldata", name = "GetXMLData.name", description = "GetXMLData.description", categoryDescription = "GetXMLData.category", documentationUrl = "http://wiki.pentaho.com/display/EAI/Get+Data+From+XML" ) public class GetXMLDataMeta extends BaseStepMeta implements StepMetaInterface { private static Class<?> PKG = GetXMLDataMeta.class; // for i18n purposes, needed by Translator2!! private static final String YES = "Y"; public static final String[] RequiredFilesDesc = new String[] { BaseMessages.getString( PKG, "System.Combo.No" ), BaseMessages.getString( PKG, "System.Combo.Yes" ) }; public static final String[] RequiredFilesCode = new String[] { "N", "Y" }; public static final String AT = "@"; public static final String N0DE_SEPARATOR = "/"; /** Array of filenames */ private String[] fileName; /** Wildcard or filemask (regular expression) */ private String[] fileMask; /** Array of boolean values as string, indicating if a file is required. */ private String[] fileRequired; /** Wildcard or filemask to exclude (regular expression) */ private String[] excludeFileMask; /** Flag indicating that we should include the filename in the output */ private boolean includeFilename; /** The name of the field in the output containing the filename */ private String filenameField; /** Flag indicating that a row number field should be included in the output */ private boolean includeRowNumber; /** The name of the field in the output containing the row number */ private String rowNumberField; /** The maximum number or lines to read */ private long rowLimit; /** The XPath location to loop over */ private String loopxpath; /** The fields to import... */ private GetXMLDataField[] inputFields; /** The encoding to use for reading: null or empty string means system default encoding */ private String encoding; /** Is In fields */ private String xmlField; /** Is In fields */ private boolean inFields; /** Is a File */ private boolean IsAFile; /** Flag: add result filename **/ private boolean addResultFile; /** Flag: set Namespace aware **/ private boolean nameSpaceAware; /** Flag: set XML Validating **/ private boolean validating; /** Flag : do we process use tokens? */ private boolean usetoken; /** Flag : do we ignore empty files */ private boolean IsIgnoreEmptyFile; /** Array of boolean values as string, indicating if we need to fetch sub folders. */ private String[] includeSubFolders; /** Flag : do not fail if no file */ private boolean doNotFailIfNoFile; /** Flag : ignore comments */ private boolean ignorecomments; /** Flag : read url as source */ private boolean readurl; // Given this path activates the streaming algorithm to process large files private String prunePath; /** Additional fields **/ private String shortFileFieldName; private String pathFieldName; private String hiddenFieldName; private String lastModificationTimeFieldName; private String uriNameFieldName; private String rootUriNameFieldName; private String extensionFieldName; private String sizeFieldName; public GetXMLDataMeta() { super(); // allocate BaseStepMeta } /** * @return Returns the shortFileFieldName. */ public String getShortFileNameField() { return shortFileFieldName; } /** * @param field * The shortFileFieldName to set. */ public void setShortFileNameField( String field ) { shortFileFieldName = field; } /** * @return Returns the pathFieldName. */ public String getPathField() { return pathFieldName; } /** * @param field * The pathFieldName to set. */ public void setPathField( String field ) { this.pathFieldName = field; } /** * @return Returns the hiddenFieldName. */ public String isHiddenField() { return hiddenFieldName; } /** * @param field * The hiddenFieldName to set. */ public void setIsHiddenField( String field ) { hiddenFieldName = field; } /** * @return Returns the lastModificationTimeFieldName. */ public String getLastModificationDateField() { return lastModificationTimeFieldName; } /** * @param field * The lastModificationTimeFieldName to set. */ public void setLastModificationDateField( String field ) { lastModificationTimeFieldName = field; } /** * @return Returns the uriNameFieldName. */ public String getUriField() { return uriNameFieldName; } /** * @param field * The uriNameFieldName to set. */ public void setUriField( String field ) { uriNameFieldName = field; } /** * @return Returns the uriNameFieldName. */ public String getRootUriField() { return rootUriNameFieldName; } /** * @param field * The rootUriNameFieldName to set. */ public void setRootUriField( String field ) { rootUriNameFieldName = field; } /** * @return Returns the extensionFieldName. */ public String getExtensionField() { return extensionFieldName; } /** * @param field * The extensionFieldName to set. */ public void setExtensionField( String field ) { extensionFieldName = field; } /** * @return Returns the sizeFieldName. */ public String getSizeField() { return sizeFieldName; } /** * @param field * The sizeFieldName to set. */ public void setSizeField( String field ) { sizeFieldName = field; } /** * @return the add result filesname flag */ public boolean addResultFile() { return addResultFile; } /** * @return the validating flag */ public boolean isValidating() { return validating; } /** * @param validating * the validating flag to set */ public void setValidating( boolean validating ) { this.validating = validating; } /** * @return the readurl flag */ public boolean isReadUrl() { return readurl; } /** * @param readurl * the readurl flag to set */ public void setReadUrl( boolean readurl ) { this.readurl = readurl; } public void setAddResultFile( boolean addResultFile ) { this.addResultFile = addResultFile; } /** * @return Returns the input fields. */ public GetXMLDataField[] getInputFields() { return inputFields; } /** * @param inputFields * The input fields to set. */ public void setInputFields( GetXMLDataField[] inputFields ) { this.inputFields = inputFields; } /** * @return Returns the excludeFileMask. */ public String[] getExludeFileMask() { return excludeFileMask; } /** * @param excludeFileMask * The excludeFileMask to set. */ public void setExcludeFileMask( String[] excludeFileMask ) { this.excludeFileMask = excludeFileMask; } /** * Get XML field. */ public String getXMLField() { return xmlField; } /** * Set XML field. */ public void setXMLField( String xmlField ) { this.xmlField = xmlField; } /** * Get the IsInFields. */ public boolean isInFields() { return inFields; } /** * @param inFields * set the inFields. */ public void setInFields( boolean inFields ) { this.inFields = inFields; } /** * @return Returns the fileMask. */ public String[] getFileMask() { return fileMask; } /** * @param fileMask * The fileMask to set. */ public void setFileMask( String[] fileMask ) { this.fileMask = fileMask; } public String[] getFileRequired() { return fileRequired; } public void setFileRequired( String[] fileRequiredin ) { for ( int i = 0; i < fileRequiredin.length; i++ ) { this.fileRequired[i] = getRequiredFilesCode( fileRequiredin[i] ); } } public void setIncludeSubFolders( String[] includeSubFoldersin ) { for ( int i = 0; i < includeSubFoldersin.length; i++ ) { this.includeSubFolders[i] = getRequiredFilesCode( includeSubFoldersin[i] ); } } /** * @return Returns the fileName. */ public String[] getFileName() { return fileName; } /** * @param fileName * The fileName to set. */ public void setFileName( String[] fileName ) { this.fileName = fileName; } /** * @return Returns the filenameField. */ public String getFilenameField() { return filenameField; } /** * @param filenameField * The filenameField to set. */ public void setFilenameField( String filenameField ) { this.filenameField = filenameField; } /** * @return Returns the includeFilename. */ public boolean includeFilename() { return includeFilename; } /** * @param includeFilename * The includeFilename to set. */ public void setIncludeFilename( boolean includeFilename ) { this.includeFilename = includeFilename; } /** * @return Returns the includeRowNumber. */ public boolean includeRowNumber() { return includeRowNumber; } /** * @param includeRowNumber * The includeRowNumber to set. */ public void setIncludeRowNumber( boolean includeRowNumber ) { this.includeRowNumber = includeRowNumber; } /** * @return Returns the rowLimit. */ public long getRowLimit() { return rowLimit; } /** * @param rowLimit * The rowLimit to set. */ public void setRowLimit( long rowLimit ) { this.rowLimit = rowLimit; } /** * @return Returns the LoopXPath */ public String getLoopXPath() { return loopxpath; } /** * @param loopxpath * The loopxpath to set. */ public void setLoopXPath( String loopxpath ) { this.loopxpath = loopxpath; } /** * @param usetoken * the "use token" flag to set */ public void setuseToken( boolean usetoken ) { this.usetoken = usetoken; } /** * @return the use token flag */ public boolean isuseToken() { return usetoken; } /** * @return the IsIgnoreEmptyFile flag */ public boolean isIgnoreEmptyFile() { return IsIgnoreEmptyFile; } /** * @param IsIgnoreEmptyFile * the IsIgnoreEmptyFile to set */ public void setIgnoreEmptyFile( boolean IsIgnoreEmptyFile ) { this.IsIgnoreEmptyFile = IsIgnoreEmptyFile; } /** * @return the doNotFailIfNoFile flag */ public boolean isdoNotFailIfNoFile() { return doNotFailIfNoFile; } /** * @param doNotFailIfNoFile * the doNotFailIfNoFile to set */ public void setdoNotFailIfNoFile( boolean doNotFailIfNoFile ) { this.doNotFailIfNoFile = doNotFailIfNoFile; } /** * @return the ignorecomments flag */ public boolean isIgnoreComments() { return ignorecomments; } /** * @param ignorecomments * the ignorecomments to set */ public void setIgnoreComments( boolean ignorecomments ) { this.ignorecomments = ignorecomments; } /** * @param nameSpaceAware * the name space aware flag to set */ public void setNamespaceAware( boolean nameSpaceAware ) { this.nameSpaceAware = nameSpaceAware; } /** * @return the name space aware flag */ public boolean isNamespaceAware() { return nameSpaceAware; } /** * @return Returns the rowNumberField. */ public String getRowNumberField() { return rowNumberField; } /** * @param rowNumberField * The rowNumberField to set. */ public void setRowNumberField( String rowNumberField ) { this.rowNumberField = rowNumberField; } /** * @return the encoding */ public String getEncoding() { return encoding; } /** * @param encoding * the encoding to set */ public void setEncoding( String encoding ) { this.encoding = encoding; } public boolean getIsAFile() { return IsAFile; } public void setIsAFile( boolean IsAFile ) { this.IsAFile = IsAFile; } /** * @return the prunePath */ public String getPrunePath() { return prunePath; } public String[] getIncludeSubFolders() { return includeSubFolders; } /** * @param prunePath * the prunePath to set */ public void setPrunePath( String prunePath ) { this.prunePath = prunePath; } public void loadXML( Node stepnode, List<DatabaseMeta> databases, IMetaStore metaStore ) throws KettleXMLException { readData( stepnode ); } public Object clone() { GetXMLDataMeta retval = (GetXMLDataMeta) super.clone(); int nrFiles = fileName.length; int nrFields = inputFields.length; retval.allocate( nrFiles, nrFields ); for ( int i = 0; i < nrFiles; i++ ) { retval.fileName[i] = fileName[i]; retval.fileMask[i] = fileMask[i]; retval.excludeFileMask[i] = excludeFileMask[i]; retval.fileRequired[i] = fileRequired[i]; retval.includeSubFolders[i] = includeSubFolders[i]; } for ( int i = 0; i < nrFields; i++ ) { if ( inputFields[i] != null ) { retval.inputFields[i] = (GetXMLDataField) inputFields[i].clone(); } } return retval; } public String getXML() { StringBuffer retval = new StringBuffer( 400 ); retval.append( " " ).append( XMLHandler.addTagValue( "include", includeFilename ) ); retval.append( " " ).append( XMLHandler.addTagValue( "include_field", filenameField ) ); retval.append( " " ).append( XMLHandler.addTagValue( "rownum", includeRowNumber ) ); retval.append( " " ).append( XMLHandler.addTagValue( "addresultfile", addResultFile ) ); retval.append( " " ).append( XMLHandler.addTagValue( "namespaceaware", nameSpaceAware ) ); retval.append( " " ).append( XMLHandler.addTagValue( "ignorecomments", ignorecomments ) ); retval.append( " " ).append( XMLHandler.addTagValue( "readurl", readurl ) ); retval.append( " " ).append( XMLHandler.addTagValue( "validating", validating ) ); retval.append( " " + XMLHandler.addTagValue( "usetoken", usetoken ) ); retval.append( " " + XMLHandler.addTagValue( "IsIgnoreEmptyFile", IsIgnoreEmptyFile ) ); retval.append( " " + XMLHandler.addTagValue( "doNotFailIfNoFile", doNotFailIfNoFile ) ); retval.append( " " ).append( XMLHandler.addTagValue( "rownum_field", rowNumberField ) ); retval.append( " " ).append( XMLHandler.addTagValue( "encoding", encoding ) ); retval.append( " <file>" ).append( Const.CR ); for ( int i = 0; i < fileName.length; i++ ) { retval.append( " " ).append( XMLHandler.addTagValue( "name", fileName[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "filemask", fileMask[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "exclude_filemask", excludeFileMask[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "file_required", fileRequired[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "include_subfolders", includeSubFolders[i] ) ); } retval.append( " </file>" ).append( Const.CR ); retval.append( " <fields>" ).append( Const.CR ); for ( int i = 0; i < inputFields.length; i++ ) { GetXMLDataField field = inputFields[i]; retval.append( field.getXML() ); } retval.append( " </fields>" ).append( Const.CR ); retval.append( " " ).append( XMLHandler.addTagValue( "limit", rowLimit ) ); retval.append( " " ).append( XMLHandler.addTagValue( "loopxpath", loopxpath ) ); retval.append( " " ).append( XMLHandler.addTagValue( "IsInFields", inFields ) ); retval.append( " " ).append( XMLHandler.addTagValue( "IsAFile", IsAFile ) ); retval.append( " " ).append( XMLHandler.addTagValue( "XmlField", xmlField ) ); retval.append( " " ).append( XMLHandler.addTagValue( "prunePath", prunePath ) ); retval.append( " " ).append( XMLHandler.addTagValue( "shortFileFieldName", shortFileFieldName ) ); retval.append( " " ).append( XMLHandler.addTagValue( "pathFieldName", pathFieldName ) ); retval.append( " " ).append( XMLHandler.addTagValue( "hiddenFieldName", hiddenFieldName ) ); retval.append( " " ).append( XMLHandler.addTagValue( "lastModificationTimeFieldName", lastModificationTimeFieldName ) ); retval.append( " " ).append( XMLHandler.addTagValue( "uriNameFieldName", uriNameFieldName ) ); retval.append( " " ).append( XMLHandler.addTagValue( "rootUriNameFieldName", rootUriNameFieldName ) ); retval.append( " " ).append( XMLHandler.addTagValue( "extensionFieldName", extensionFieldName ) ); retval.append( " " ).append( XMLHandler.addTagValue( "sizeFieldName", sizeFieldName ) ); return retval.toString(); } public String getRequiredFilesDesc( String tt ) { if ( Utils.isEmpty( tt ) ) { return RequiredFilesDesc[0]; } if ( tt.equalsIgnoreCase( RequiredFilesCode[1] ) ) { return RequiredFilesDesc[1]; } else { return RequiredFilesDesc[0]; } } public String getRequiredFilesCode( String tt ) { if ( tt == null ) { return RequiredFilesCode[0]; } if ( tt.equals( RequiredFilesDesc[1] ) ) { return RequiredFilesCode[1]; } else { return RequiredFilesCode[0]; } } private void readData( Node stepnode ) throws KettleXMLException { try { includeFilename = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "include" ) ); filenameField = XMLHandler.getTagValue( stepnode, "include_field" ); addResultFile = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "addresultfile" ) ); nameSpaceAware = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "namespaceaware" ) ); ignorecomments = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "ignorecomments" ) ); readurl = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "readurl" ) ); validating = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "validating" ) ); usetoken = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "usetoken" ) ); IsIgnoreEmptyFile = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "IsIgnoreEmptyFile" ) ); doNotFailIfNoFile = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "doNotFailIfNoFile" ) ); includeRowNumber = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "rownum" ) ); rowNumberField = XMLHandler.getTagValue( stepnode, "rownum_field" ); encoding = XMLHandler.getTagValue( stepnode, "encoding" ); Node filenode = XMLHandler.getSubNode( stepnode, "file" ); Node fields = XMLHandler.getSubNode( stepnode, "fields" ); int nrFiles = XMLHandler.countNodes( filenode, "name" ); int nrFields = XMLHandler.countNodes( fields, "field" ); allocate( nrFiles, nrFields ); for ( int i = 0; i < nrFiles; i++ ) { Node filenamenode = XMLHandler.getSubNodeByNr( filenode, "name", i ); Node filemasknode = XMLHandler.getSubNodeByNr( filenode, "filemask", i ); Node excludefilemasknode = XMLHandler.getSubNodeByNr( filenode, "exclude_filemask", i ); Node fileRequirednode = XMLHandler.getSubNodeByNr( filenode, "file_required", i ); Node includeSubFoldersnode = XMLHandler.getSubNodeByNr( filenode, "include_subfolders", i ); fileName[i] = XMLHandler.getNodeValue( filenamenode ); fileMask[i] = XMLHandler.getNodeValue( filemasknode ); excludeFileMask[i] = XMLHandler.getNodeValue( excludefilemasknode ); fileRequired[i] = XMLHandler.getNodeValue( fileRequirednode ); includeSubFolders[i] = XMLHandler.getNodeValue( includeSubFoldersnode ); } for ( int i = 0; i < nrFields; i++ ) { Node fnode = XMLHandler.getSubNodeByNr( fields, "field", i ); GetXMLDataField field = new GetXMLDataField( fnode ); inputFields[i] = field; } // Is there a limit on the number of rows we process? rowLimit = Const.toLong( XMLHandler.getTagValue( stepnode, "limit" ), 0L ); // Do we skip rows before starting to read loopxpath = XMLHandler.getTagValue( stepnode, "loopxpath" ); inFields = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "IsInFields" ) ); IsAFile = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, "IsAFile" ) ); xmlField = XMLHandler.getTagValue( stepnode, "XmlField" ); prunePath = XMLHandler.getTagValue( stepnode, "prunePath" ); shortFileFieldName = XMLHandler.getTagValue( stepnode, "shortFileFieldName" ); pathFieldName = XMLHandler.getTagValue( stepnode, "pathFieldName" ); hiddenFieldName = XMLHandler.getTagValue( stepnode, "hiddenFieldName" ); lastModificationTimeFieldName = XMLHandler.getTagValue( stepnode, "lastModificationTimeFieldName" ); uriNameFieldName = XMLHandler.getTagValue( stepnode, "uriNameFieldName" ); rootUriNameFieldName = XMLHandler.getTagValue( stepnode, "rootUriNameFieldName" ); extensionFieldName = XMLHandler.getTagValue( stepnode, "extensionFieldName" ); sizeFieldName = XMLHandler.getTagValue( stepnode, "sizeFieldName" ); } catch ( Exception e ) { throw new KettleXMLException( BaseMessages.getString( PKG, "GetXMLDataMeta.Exception.ErrorLoadingXML", e .toString() ) ); } } public void allocate( int nrfiles, int nrfields ) { allocateFiles( nrfiles ); inputFields = new GetXMLDataField[nrfields]; } public void allocateFiles( int nrfiles ) { fileName = new String[nrfiles]; fileMask = new String[nrfiles]; excludeFileMask = new String[nrfiles]; fileRequired = new String[nrfiles]; includeSubFolders = new String[nrfiles]; } public void setDefault() { shortFileFieldName = null; pathFieldName = null; hiddenFieldName = null; lastModificationTimeFieldName = null; uriNameFieldName = null; rootUriNameFieldName = null; extensionFieldName = null; sizeFieldName = null; usetoken = false; IsIgnoreEmptyFile = false; doNotFailIfNoFile = true; includeFilename = false; filenameField = ""; includeRowNumber = false; rowNumberField = ""; IsAFile = false; addResultFile = false; nameSpaceAware = false; ignorecomments = false; readurl = false; validating = false; int nrFiles = 0; int nrFields = 0; loopxpath = ""; allocate( nrFiles, nrFields ); for ( int i = 0; i < nrFiles; i++ ) { fileName[i] = "filename" + ( i + 1 ); fileMask[i] = ""; excludeFileMask[i] = ""; fileRequired[i] = RequiredFilesCode[0]; includeSubFolders[i] = RequiredFilesCode[0]; } for ( int i = 0; i < nrFields; i++ ) { inputFields[i] = new GetXMLDataField( "field" + ( i + 1 ) ); } rowLimit = 0; inFields = false; xmlField = ""; prunePath = ""; } public void getFields( RowMetaInterface r, String name, RowMetaInterface[] info, StepMeta nextStep, VariableSpace space, Repository repository, IMetaStore metaStore ) throws KettleStepException { int i; for ( i = 0; i < inputFields.length; i++ ) { GetXMLDataField field = inputFields[i]; int type = field.getType(); if ( type == ValueMeta.TYPE_NONE ) { type = ValueMeta.TYPE_STRING; } try { ValueMetaInterface v = ValueMetaFactory.createValueMeta( space.environmentSubstitute( field.getName() ), type ); v.setLength( field.getLength() ); v.setPrecision( field.getPrecision() ); v.setOrigin( name ); v.setConversionMask( field.getFormat() ); v.setDecimalSymbol( field.getDecimalSymbol() ); v.setGroupingSymbol( field.getGroupSymbol() ); v.setCurrencySymbol( field.getCurrencySymbol() ); r.addValueMeta( v ); } catch ( Exception e ) { throw new KettleStepException( e ); } } if ( includeFilename ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( filenameField ), ValueMeta.TYPE_STRING ); v.setLength( 250 ); v.setPrecision( -1 ); v.setOrigin( name ); r.addValueMeta( v ); } if ( includeRowNumber ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( rowNumberField ), ValueMeta.TYPE_INTEGER ); v.setLength( ValueMetaInterface.DEFAULT_INTEGER_LENGTH, 0 ); v.setOrigin( name ); r.addValueMeta( v ); } // Add additional fields if ( getShortFileNameField() != null && getShortFileNameField().length() > 0 ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( getShortFileNameField() ), ValueMeta.TYPE_STRING ); v.setLength( 100, -1 ); v.setOrigin( name ); r.addValueMeta( v ); } if ( getExtensionField() != null && getExtensionField().length() > 0 ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( getExtensionField() ), ValueMeta.TYPE_STRING ); v.setLength( 100, -1 ); v.setOrigin( name ); r.addValueMeta( v ); } if ( getPathField() != null && getPathField().length() > 0 ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( getPathField() ), ValueMeta.TYPE_STRING ); v.setLength( 100, -1 ); v.setOrigin( name ); r.addValueMeta( v ); } if ( getSizeField() != null && getSizeField().length() > 0 ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( getSizeField() ), ValueMeta.TYPE_INTEGER ); v.setOrigin( name ); v.setLength( 9 ); r.addValueMeta( v ); } if ( isHiddenField() != null && isHiddenField().length() > 0 ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( isHiddenField() ), ValueMeta.TYPE_BOOLEAN ); v.setOrigin( name ); r.addValueMeta( v ); } if ( getLastModificationDateField() != null && getLastModificationDateField().length() > 0 ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( getLastModificationDateField() ), ValueMeta.TYPE_DATE ); v.setOrigin( name ); r.addValueMeta( v ); } if ( getUriField() != null && getUriField().length() > 0 ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( getUriField() ), ValueMeta.TYPE_STRING ); v.setLength( 100, -1 ); v.setOrigin( name ); r.addValueMeta( v ); } if ( getRootUriField() != null && getRootUriField().length() > 0 ) { ValueMetaInterface v = new ValueMeta( space.environmentSubstitute( getRootUriField() ), ValueMeta.TYPE_STRING ); v.setLength( 100, -1 ); v.setOrigin( name ); r.addValueMeta( v ); } } public void readRep( Repository rep, IMetaStore metaStore, ObjectId id_step, List<DatabaseMeta> databases ) throws KettleException { try { includeFilename = rep.getStepAttributeBoolean( id_step, "include" ); filenameField = rep.getStepAttributeString( id_step, "include_field" ); addResultFile = rep.getStepAttributeBoolean( id_step, "addresultfile" ); nameSpaceAware = rep.getStepAttributeBoolean( id_step, "namespaceaware" ); ignorecomments = rep.getStepAttributeBoolean( id_step, "ignorecomments" ); readurl = rep.getStepAttributeBoolean( id_step, "readurl" ); validating = rep.getStepAttributeBoolean( id_step, "validating" ); usetoken = rep.getStepAttributeBoolean( id_step, "usetoken" ); IsIgnoreEmptyFile = rep.getStepAttributeBoolean( id_step, "IsIgnoreEmptyFile" ); doNotFailIfNoFile = rep.getStepAttributeBoolean( id_step, "doNotFailIfNoFile" ); includeRowNumber = rep.getStepAttributeBoolean( id_step, "rownum" ); rowNumberField = rep.getStepAttributeString( id_step, "rownum_field" ); rowLimit = rep.getStepAttributeInteger( id_step, "limit" ); loopxpath = rep.getStepAttributeString( id_step, "loopxpath" ); encoding = rep.getStepAttributeString( id_step, "encoding" ); int nrFiles = rep.countNrStepAttributes( id_step, "file_name" ); int nrFields = rep.countNrStepAttributes( id_step, "field_name" ); allocate( nrFiles, nrFields ); for ( int i = 0; i < nrFiles; i++ ) { fileName[i] = rep.getStepAttributeString( id_step, i, "file_name" ); fileMask[i] = rep.getStepAttributeString( id_step, i, "file_mask" ); excludeFileMask[i] = rep.getStepAttributeString( id_step, i, "exclude_file_mask" ); fileRequired[i] = rep.getStepAttributeString( id_step, i, "file_required" ); includeSubFolders[i] = rep.getStepAttributeString( id_step, i, "include_subfolders" ); } for ( int i = 0; i < nrFields; i++ ) { GetXMLDataField field = new GetXMLDataField(); field.setName( rep.getStepAttributeString( id_step, i, "field_name" ) ); field.setXPath( rep.getStepAttributeString( id_step, i, "field_xpath" ) ); field.setElementType( GetXMLDataField.getElementTypeByCode( rep.getStepAttributeString( id_step, i, "element_type" ) ) ); field.setResultType( GetXMLDataField.getResultTypeByCode( rep .getStepAttributeString( id_step, i, "result_type" ) ) ); field.setType( ValueMeta.getType( rep.getStepAttributeString( id_step, i, "field_type" ) ) ); field.setFormat( rep.getStepAttributeString( id_step, i, "field_format" ) ); field.setCurrencySymbol( rep.getStepAttributeString( id_step, i, "field_currency" ) ); field.setDecimalSymbol( rep.getStepAttributeString( id_step, i, "field_decimal" ) ); field.setGroupSymbol( rep.getStepAttributeString( id_step, i, "field_group" ) ); field.setLength( (int) rep.getStepAttributeInteger( id_step, i, "field_length" ) ); field.setPrecision( (int) rep.getStepAttributeInteger( id_step, i, "field_precision" ) ); field.setTrimType( GetXMLDataField.getTrimTypeByCode( rep .getStepAttributeString( id_step, i, "field_trim_type" ) ) ); field.setRepeated( rep.getStepAttributeBoolean( id_step, i, "field_repeat" ) ); inputFields[i] = field; } inFields = rep.getStepAttributeBoolean( id_step, "IsInFields" ); IsAFile = rep.getStepAttributeBoolean( id_step, "IsAFile" ); xmlField = rep.getStepAttributeString( id_step, "XmlField" ); prunePath = rep.getStepAttributeString( id_step, "prunePath" ); shortFileFieldName = rep.getStepAttributeString( id_step, "shortFileFieldName" ); pathFieldName = rep.getStepAttributeString( id_step, "pathFieldName" ); hiddenFieldName = rep.getStepAttributeString( id_step, "hiddenFieldName" ); lastModificationTimeFieldName = rep.getStepAttributeString( id_step, "lastModificationTimeFieldName" ); uriNameFieldName = rep.getStepAttributeString( id_step, "uriNameFieldName" ); rootUriNameFieldName = rep.getStepAttributeString( id_step, "rootUriNameFieldName" ); extensionFieldName = rep.getStepAttributeString( id_step, "extensionFieldName" ); sizeFieldName = rep.getStepAttributeString( id_step, "sizeFieldName" ); } catch ( Exception e ) { throw new KettleException( BaseMessages.getString( PKG, "GetXMLDataMeta.Exception.ErrorReadingRepository" ), e ); } } public void saveRep( Repository rep, IMetaStore metaStore, ObjectId id_transformation, ObjectId id_step ) throws KettleException { try { rep.saveStepAttribute( id_transformation, id_step, "include", includeFilename ); rep.saveStepAttribute( id_transformation, id_step, "include_field", filenameField ); rep.saveStepAttribute( id_transformation, id_step, "addresultfile", addResultFile ); rep.saveStepAttribute( id_transformation, id_step, "namespaceaware", nameSpaceAware ); rep.saveStepAttribute( id_transformation, id_step, "ignorecomments", ignorecomments ); rep.saveStepAttribute( id_transformation, id_step, "readurl", readurl ); rep.saveStepAttribute( id_transformation, id_step, "validating", validating ); rep.saveStepAttribute( id_transformation, id_step, "usetoken", usetoken ); rep.saveStepAttribute( id_transformation, id_step, "IsIgnoreEmptyFile", IsIgnoreEmptyFile ); rep.saveStepAttribute( id_transformation, id_step, "doNotFailIfNoFile", doNotFailIfNoFile ); rep.saveStepAttribute( id_transformation, id_step, "rownum", includeRowNumber ); rep.saveStepAttribute( id_transformation, id_step, "rownum_field", rowNumberField ); rep.saveStepAttribute( id_transformation, id_step, "limit", rowLimit ); rep.saveStepAttribute( id_transformation, id_step, "loopxpath", loopxpath ); rep.saveStepAttribute( id_transformation, id_step, "encoding", encoding ); for ( int i = 0; i < fileName.length; i++ ) { rep.saveStepAttribute( id_transformation, id_step, i, "file_name", fileName[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "file_mask", fileMask[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "exclude_file_mask", excludeFileMask[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "file_required", fileRequired[i] ); rep.saveStepAttribute( id_transformation, id_step, i, "include_subfolders", includeSubFolders[i] ); } for ( int i = 0; i < inputFields.length; i++ ) { GetXMLDataField field = inputFields[i]; rep.saveStepAttribute( id_transformation, id_step, i, "field_name", field.getName() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_xpath", field.getXPath() ); rep.saveStepAttribute( id_transformation, id_step, i, "element_type", field.getElementTypeCode() ); rep.saveStepAttribute( id_transformation, id_step, i, "result_type", field.getResultTypeCode() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_type", field.getTypeDesc() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_format", field.getFormat() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_currency", field.getCurrencySymbol() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_decimal", field.getDecimalSymbol() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_group", field.getGroupSymbol() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_length", field.getLength() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_precision", field.getPrecision() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_trim_type", field.getTrimTypeCode() ); rep.saveStepAttribute( id_transformation, id_step, i, "field_repeat", field.isRepeated() ); } rep.saveStepAttribute( id_transformation, id_step, "IsInFields", inFields ); rep.saveStepAttribute( id_transformation, id_step, "IsAFile", IsAFile ); rep.saveStepAttribute( id_transformation, id_step, "XmlField", xmlField ); rep.saveStepAttribute( id_transformation, id_step, "prunePath", prunePath ); rep.saveStepAttribute( id_transformation, id_step, "shortFileFieldName", shortFileFieldName ); rep.saveStepAttribute( id_transformation, id_step, "pathFieldName", pathFieldName ); rep.saveStepAttribute( id_transformation, id_step, "hiddenFieldName", hiddenFieldName ); rep.saveStepAttribute( id_transformation, id_step, "lastModificationTimeFieldName", lastModificationTimeFieldName ); rep.saveStepAttribute( id_transformation, id_step, "uriNameFieldName", uriNameFieldName ); rep.saveStepAttribute( id_transformation, id_step, "rootUriNameFieldName", rootUriNameFieldName ); rep.saveStepAttribute( id_transformation, id_step, "extensionFieldName", extensionFieldName ); } catch ( Exception e ) { throw new KettleException( BaseMessages.getString( PKG, "GetXMLDataMeta.Exception.ErrorSavingToRepository", "" + id_step ), e ); } } public FileInputList getFiles( VariableSpace space ) { return FileInputList.createFileList( space, fileName, fileMask, excludeFileMask, fileRequired, includeSubFolderBoolean() ); } private boolean[] includeSubFolderBoolean() { int len = fileName.length; boolean[] includeSubFolderBoolean = new boolean[len]; for ( int i = 0; i < len; i++ ) { includeSubFolderBoolean[i] = YES.equalsIgnoreCase( includeSubFolders[i] ); } return includeSubFolderBoolean; } public void check( List<CheckResultInterface> remarks, TransMeta transMeta, StepMeta stepMeta, RowMetaInterface prev, String[] input, String[] output, RowMetaInterface info, VariableSpace space, Repository repository, IMetaStore metaStore ) { CheckResult cr; // See if we get input... if ( input.length <= 0 ) { cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString( PKG, "GetXMLDataMeta.CheckResult.NoInputExpected" ), stepMeta ); remarks.add( cr ); } else { cr = new CheckResult( CheckResult.TYPE_RESULT_OK, BaseMessages.getString( PKG, "GetXMLDataMeta.CheckResult.NoInput" ), stepMeta ); remarks.add( cr ); } // control Xpath if ( getLoopXPath() == null || Utils.isEmpty( getLoopXPath() ) ) { cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString( PKG, "GetXMLDataMeta.CheckResult.NoLoopXpath" ), stepMeta ); remarks.add( cr ); } if ( getInputFields().length <= 0 ) { cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString( PKG, "GetXMLDataMeta.CheckResult.NoInputField" ), stepMeta ); remarks.add( cr ); } if ( isInFields() ) { if ( Utils.isEmpty( getXMLField() ) ) { cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString( PKG, "GetXMLDataMeta.CheckResult.NoField" ), stepMeta ); remarks.add( cr ); } else { cr = new CheckResult( CheckResult.TYPE_RESULT_OK, BaseMessages.getString( PKG, "GetXMLDataMeta.CheckResult.FieldOk" ), stepMeta ); remarks.add( cr ); } } else { FileInputList fileInputList = getFiles( transMeta ); // String files[] = getFiles(); if ( fileInputList == null || fileInputList.getFiles().size() == 0 ) { cr = new CheckResult( CheckResult.TYPE_RESULT_ERROR, BaseMessages.getString( PKG, "GetXMLDataMeta.CheckResult.NoFiles" ), stepMeta ); remarks.add( cr ); } else { cr = new CheckResult( CheckResult.TYPE_RESULT_OK, BaseMessages.getString( PKG, "GetXMLDataMeta.CheckResult.FilesOk", "" + fileInputList.getFiles().size() ), stepMeta ); remarks.add( cr ); } } } public StepInterface getStep( StepMeta stepMeta, StepDataInterface stepDataInterface, int cnr, TransMeta tr, Trans trans ) { return new GetXMLData( stepMeta, stepDataInterface, cnr, tr, trans ); } public StepDataInterface getStepData() { return new GetXMLDataData(); } public boolean supportsErrorHandling() { return true; } /** * Since the exported transformation that runs this will reside in a ZIP file, we can't reference files relatively. So * what this does is turn the name of files into absolute paths OR it simply includes the resource in the ZIP file. * For now, we'll simply turn it into an absolute path and pray that the file is on a shared drive or something like * that. * * @param space * the variable space to use * @param definitions * @param resourceNamingInterface * @param repository * The repository to optionally load other resources from (to be converted to XML) * @param metaStore * the metaStore in which non-kettle metadata could reside. * * @return the filename of the exported resource */ public String exportResources( VariableSpace space, Map<String, ResourceDefinition> definitions, ResourceNamingInterface resourceNamingInterface, Repository repository, IMetaStore metaStore ) throws KettleException { try { // The object that we're modifying here is a copy of the original! // So let's change the filename from relative to absolute by grabbing the file object... // In case the name of the file comes from previous steps, forget about this! // List<String> newFilenames = new ArrayList<String>(); if ( !isInFields() ) { FileInputList fileList = getFiles( space ); if ( fileList.getFiles().size() > 0 ) { for ( FileObject fileObject : fileList.getFiles() ) { // From : ${Internal.Transformation.Filename.Directory}/../foo/bar.xml // To : /home/matt/test/files/foo/bar.xml // // If the file doesn't exist, forget about this effort too! // if ( fileObject.exists() ) { // Convert to an absolute path and add it to the list. // newFilenames.add( fileObject.getName().getPath() ); } } // Still here: set a new list of absolute filenames! // fileName = newFilenames.toArray( new String[newFilenames.size()] ); fileMask = new String[newFilenames.size()]; // all null since converted to absolute path. fileRequired = new String[newFilenames.size()]; // all null, turn to "Y" : for ( int i = 0; i < newFilenames.size(); i++ ) { fileRequired[i] = "Y"; } } } return null; } catch ( Exception e ) { throw new KettleException( e ); } } @Override public StepMetaInjectionInterface getStepMetaInjectionInterface() { return new GetXMLDataMetaInjection( this ); } @Override public TransMeta.TransformationType[] getSupportedTransformationTypes() { return new TransMeta.TransformationType[] { TransMeta.TransformationType.Normal }; } }