/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.getxmldata; import java.util.ArrayList; import java.util.List; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.row.ValueMeta; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.core.row.value.ValueMetaFactory; import org.pentaho.di.trans.step.BaseStepMetaInjection; import org.pentaho.di.trans.step.StepInjectionMetaEntry; import org.pentaho.di.trans.step.StepMetaInjectionEnumEntry; import org.pentaho.di.trans.step.StepMetaInjectionInterface; /** * To keep it simple, this metadata injection interface only supports the fields in the spreadsheet for the time being. * * @author Matt */ public class GetXMLDataMetaInjection extends BaseStepMetaInjection implements StepMetaInjectionInterface { private GetXMLDataMeta meta; public GetXMLDataMetaInjection( GetXMLDataMeta meta ) { this.meta = meta; } private enum Entry implements StepMetaInjectionEnumEntry { INCLUDE_ROWNUMBER( ValueMetaInterface.TYPE_STRING, "Flag indicating that a row number field should be included in the output (Y/N)" ), ROWNUMBER_FIELD( ValueMetaInterface.TYPE_STRING, "The name of the field in the output containing the row number" ), ROWLIMIT( ValueMetaInterface.TYPE_STRING, "The maximum number or lines to read (integer)" ), LOOP_XPATH( ValueMetaInterface.TYPE_STRING, "The maximum number or lines to read (The XPath location to loop over" ), ENCODING( ValueMetaInterface.TYPE_STRING, "The file encoding" ), XML_FIELD( ValueMetaInterface.TYPE_STRING, "The name of the input field which contains the XML" ), IN_FIELD( ValueMetaInterface.TYPE_STRING, "Flag indicating that the XML source is in a field" ), IN_FILE( ValueMetaInterface.TYPE_STRING, "Flag indicating that the XML source is in a file" ), ADD_RESULT_FILE( ValueMetaInterface.TYPE_STRING, "Add the file(s) to the result? (Y/N)" ), NAMESPACE_AWARE( ValueMetaInterface.TYPE_STRING, "Parse namespace aware? (Y/N)" ), VALIDATE( ValueMetaInterface.TYPE_STRING, "Validate the XML? (Y/N)" ), USE_TOKENS( ValueMetaInterface.TYPE_STRING, "Process using tokens? (Y/N)" ), IGNORE_EMPTY_FILES( ValueMetaInterface.TYPE_STRING, "Ignore empty files? (Y/N)" ), IGNORE_MISSING_FILES( ValueMetaInterface.TYPE_STRING, "Ignore missing files? (Y/N)" ), IGNORE_COMMENTS( ValueMetaInterface.TYPE_STRING, "Ignore comments? (Y/N)" ), READ_URL( ValueMetaInterface.TYPE_STRING, "Read URL as source? (Y/N)" ), PRUNE_PATH( ValueMetaInterface.TYPE_STRING, "If you set this path, it activates the streaming algorithm to process large files" ), SHORT_FILE_FIELDNAME( ValueMetaInterface.TYPE_STRING, "Output field: short file name" ), FILE_PATH_FIELDNAME( ValueMetaInterface.TYPE_STRING, "Output field: file path" ), FILE_HIDDEN_FIELDNAME( ValueMetaInterface.TYPE_STRING, "Output field: hidden file" ), FILE_MODIFICATION_FIELDNAME( ValueMetaInterface.TYPE_STRING, "Output field: file modification date" ), FILE_URI_NAME_FIELDNAME( ValueMetaInterface.TYPE_STRING, "Output field: file URI name" ), FILE_ROOT_URI_FIELDNAME( ValueMetaInterface.TYPE_STRING, "Output field: file root URI" ), FILE_EXTENSION_FIELDNAME( ValueMetaInterface.TYPE_STRING, "Output field: file extesion" ), FILE_SIZE_FIELDNAME( ValueMetaInterface.TYPE_STRING, "Output field: file size" ), INPUTFIELDS( ValueMetaInterface.TYPE_NONE, "All the input fields" ), INPUTFIELD( ValueMetaInterface.TYPE_NONE, "One input field" ), INPUTFIELD_NAME( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The name of the field" ), INPUTFIELD_XPATH( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The xpath of the field" ), INPUTFIELD_TYPE( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The type of the field (String, Integer, Date, ...)" ), INPUTFIELD_ELEMENT_TYPE( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The element type of the field (node, attribute)" ), INPUTFIELD_RESULT_TYPE( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The element result type (valueof, singlenode)" ), INPUTFIELD_LENGTH( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The length of the field" ), INPUTFIELD_PRECISION( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The precision of the field" ), INPUTFIELD_FORMAT( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The format mask of the field" ), INPUTFIELD_TRIM_TYPE( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The trim type of the field (none, left, right, both)" ), INPUTFIELD_CURRENCY( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The currency symbol" ), INPUTFIELD_GROUPING( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The grouping symbol" ), INPUTFIELD_DECIMAL( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "The decimal symbol" ), INPUTFIELD_REPEAT( INPUTFIELD, ValueMetaInterface.TYPE_STRING, "Flag to indicate we need to repeat the previous row value if the current value is null (Y/N)" ), FILENAMES( ValueMetaInterface.TYPE_NONE, "All the file names" ), FILENAME( ValueMetaInterface.TYPE_NONE, "One file name" ), FILE_PATH( FILENAME, ValueMetaInterface.TYPE_STRING, "The path to the file" ), FILE_INCLUDE_MASK( FILENAME, ValueMetaInterface.TYPE_STRING, "The regular expression to match files to include" ), FILE_EXCLUDE_MASK( FILENAME, ValueMetaInterface.TYPE_STRING, "The regular expression to match files to exclude" ), FILE_REQUIRED( FILENAME, ValueMetaInterface.TYPE_STRING, "Flag to indicate that this file is required or not (Y/N)" ), FILE_INCLUDE_SUBFOLDERS( FILENAME, ValueMetaInterface.TYPE_STRING, "Flag to indicate that subfolders should be included or not (Y/N)" ); private int valueType; private String description; private Entry parent; private Entry( int valueType, String description ) { this.valueType = valueType; this.description = description; } private Entry( Entry parent, int valueType, String description ) { this.parent = parent; this.valueType = valueType; this.description = description; } /** * @return the valueType */ public int getValueType() { return valueType; } /** * @return the description */ public String getDescription() { return description; } public static Entry findEntry( String key ) { return Entry.valueOf( key ); } public Entry getParent() { return parent; } } @Override public List<StepInjectionMetaEntry> getStepInjectionMetadataEntries() throws KettleException { List<StepInjectionMetaEntry> all = new ArrayList<StepInjectionMetaEntry>(); // All top level entries: just add them // addTopLevelStepMetaInjectionEntries( all, Entry.values() ); // Add the file names & fields // addNestedStepMetaInjectionEntries( all, Entry.values(), Entry.FILENAMES, Entry.FILENAME ); addNestedStepMetaInjectionEntries( all, Entry.values(), Entry.INPUTFIELDS, Entry.INPUTFIELD ); return all; } protected class FilenameLine { public String fileName; public String fileMask; public String fileRequired; public String excludeFileMask; public String includeSubFolders; public FilenameLine( String fileName, String fileMask, String fileRequired, String excludeFileMask, String includeSubFolders ) { super(); this.fileName = fileName; this.fileMask = fileMask; this.fileRequired = fileRequired; this.excludeFileMask = excludeFileMask; this.includeSubFolders = includeSubFolders; } } @Override public void injectStepMetadataEntries( List<StepInjectionMetaEntry> all ) throws KettleException { List<GetXMLDataMetaInjection.FilenameLine> filenameLines = new ArrayList<GetXMLDataMetaInjection.FilenameLine>(); List<GetXMLDataField> fields = new ArrayList<GetXMLDataField>(); // Parse the fields, inject into the meta class.. // for ( StepInjectionMetaEntry lookFields : all ) { String lookFieldsValue = lookFields.getValue() instanceof String ? (String) lookFields.getValue() : null; Entry fieldsEntry = Entry.findEntry( lookFields.getKey() ); if ( fieldsEntry != null ) { switch ( fieldsEntry ) { case FILENAMES: for ( StepInjectionMetaEntry lookField : lookFields.getDetails() ) { Entry fieldEntry = Entry.findEntry( lookField.getKey() ); if ( fieldEntry != null ) { if ( fieldEntry == Entry.FILENAME ) { String fileName = null; String fileMask = null; String fileRequired = null; String excludeFileMask = null; String includeSubFolders = null; List<StepInjectionMetaEntry> entries = lookField.getDetails(); for ( StepInjectionMetaEntry entry : entries ) { Entry metaEntry = Entry.findEntry( entry.getKey() ); if ( metaEntry != null ) { String value = (String) entry.getValue(); switch ( metaEntry ) { case FILE_PATH: fileName = value; break; case FILE_INCLUDE_MASK: fileMask = value; break; case FILE_EXCLUDE_MASK: excludeFileMask = value; break; case FILE_REQUIRED: fileRequired = value; break; case FILE_INCLUDE_SUBFOLDERS: includeSubFolders = value; break; default: break; } } } if ( !Utils.isEmpty( fileName ) ) { filenameLines.add( new FilenameLine( fileName, fileMask, fileRequired, excludeFileMask, includeSubFolders ) ); } } } } break; case INPUTFIELDS: for ( StepInjectionMetaEntry lookField : lookFields.getDetails() ) { Entry fieldEntry = Entry.findEntry( lookField.getKey() ); if ( fieldEntry != null ) { if ( fieldEntry == Entry.INPUTFIELD ) { GetXMLDataField field = new GetXMLDataField(); List<StepInjectionMetaEntry> entries = lookField.getDetails(); for ( StepInjectionMetaEntry entry : entries ) { Entry metaEntry = Entry.findEntry( entry.getKey() ); if ( metaEntry != null ) { String value = (String) entry.getValue(); switch ( metaEntry ) { case INPUTFIELD_NAME: field.setName( value ); break; case INPUTFIELD_XPATH: field.setXPath( value ); break; case INPUTFIELD_TYPE: field.setType( ValueMetaFactory.getIdForValueMeta( value ) ); break; case INPUTFIELD_ELEMENT_TYPE: field.setElementType( GetXMLDataField.getElementTypeByCode( value ) ); break; case INPUTFIELD_RESULT_TYPE: field.setResultType( GetXMLDataField.getResultTypeByCode( value ) ); break; case INPUTFIELD_LENGTH: field.setLength( Const.toInt( value, -1 ) ); break; case INPUTFIELD_PRECISION: field.setPrecision( Const.toInt( value, -1 ) ); break; case INPUTFIELD_FORMAT: field.setFormat( value ); break; case INPUTFIELD_TRIM_TYPE: field.setTrimType( ValueMeta.getTrimTypeByCode( value ) ); break; case INPUTFIELD_CURRENCY: field.setCurrencySymbol( value ); break; case INPUTFIELD_GROUPING: field.setGroupSymbol( value ); break; case INPUTFIELD_DECIMAL: field.setDecimalSymbol( value ); break; case INPUTFIELD_REPEAT: field.setRepeated( "Y".equalsIgnoreCase( value ) ); break; default: break; } } } fields.add( field ); } } } break; case INCLUDE_ROWNUMBER: meta.setIncludeRowNumber( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case ROWNUMBER_FIELD: meta.setRowNumberField( lookFieldsValue ); break; case ROWLIMIT: meta.setRowLimit( Const.toLong( lookFieldsValue, 0L ) ); break; case LOOP_XPATH: meta.setLoopXPath( lookFieldsValue ); break; case ENCODING: meta.setEncoding( lookFieldsValue ); break; case XML_FIELD: meta.setXMLField( lookFieldsValue ); break; case IN_FIELD: meta.setInFields( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case IN_FILE: meta.setIsAFile( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case ADD_RESULT_FILE: meta.setAddResultFile( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case NAMESPACE_AWARE: meta.setNamespaceAware( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case VALIDATE: meta.setValidating( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case USE_TOKENS: meta.setuseToken( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case IGNORE_EMPTY_FILES: meta.setIgnoreEmptyFile( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case IGNORE_MISSING_FILES: meta.setdoNotFailIfNoFile( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case IGNORE_COMMENTS: meta.setIgnoreComments( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case READ_URL: meta.setReadUrl( "Y".equalsIgnoreCase( lookFieldsValue ) ); break; case PRUNE_PATH: meta.setPrunePath( lookFieldsValue ); break; case SHORT_FILE_FIELDNAME: meta.setShortFileNameField( lookFieldsValue ); break; case FILE_PATH_FIELDNAME: meta.setFilenameField( lookFieldsValue ); break; case FILE_HIDDEN_FIELDNAME: meta.setIsHiddenField( lookFieldsValue ); break; case FILE_MODIFICATION_FIELDNAME: meta.setLastModificationDateField( lookFieldsValue ); break; case FILE_URI_NAME_FIELDNAME: meta.setUriField( lookFieldsValue ); break; case FILE_ROOT_URI_FIELDNAME: meta.setRootUriField( lookFieldsValue ); break; case FILE_EXTENSION_FIELDNAME: meta.setExtensionField( lookFieldsValue ); break; case FILE_SIZE_FIELDNAME: meta.setSizeField( lookFieldsValue ); break; default: break; } } } // Only modify fields or file names if there was injection taking place... // if ( fields.size() > 0 ) { meta.setInputFields( fields.toArray( new GetXMLDataField[fields.size()] ) ); } if ( filenameLines.size() > 0 ) { meta.allocateFiles( filenameLines.size() ); // CHECKSTYLE:Indentation:OFF for ( int i = 0; i < filenameLines.size(); i++ ) { FilenameLine line = filenameLines.get( i ); meta.getFileName()[i] = line.fileName; meta.getFileMask()[i] = line.fileMask; meta.getFileRequired()[i] = line.fileRequired; meta.getExludeFileMask()[i] = line.excludeFileMask; meta.getIncludeSubFolders()[i] = line.includeSubFolders; } } } public List<StepInjectionMetaEntry> extractStepMetadataEntries() throws KettleException { return null; } public GetXMLDataMeta getMeta() { return meta; } public class ExcelInputSheet { public String sheetName; public int startCol; public int startRow; /** * @param sheetName * @param startCol * @param startRow */ private ExcelInputSheet( String sheetName, int startCol, int startRow ) { this.sheetName = sheetName; this.startCol = startCol; this.startRow = startRow; } } }