/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.ui.trans.steps.getxmldata; import java.io.InputStream; import java.io.StringReader; import java.lang.reflect.InvocationTargetException; import java.net.URL; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.eclipse.core.runtime.IProgressMonitor; import org.eclipse.jface.dialogs.ProgressMonitorDialog; import org.eclipse.jface.operation.IRunnableWithProgress; import org.eclipse.swt.widgets.Shell; import org.pentaho.di.compatibility.Value; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLParserFactoryProducer; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.steps.getxmldata.GetXMLDataField; import org.pentaho.di.trans.steps.getxmldata.GetXMLDataMeta; import org.pentaho.di.trans.steps.getxmldata.IgnoreDTDEntityResolver; import org.pentaho.di.ui.core.dialog.ErrorDialog; /** * Takes care of displaying a dialog that will handle the wait while we're finding out loop nodes for an XML file * * @author Samatar * @since 07-apr-2010 */ public class XMLInputFieldsImportProgressDialog { private static Class<?> PKG = GetXMLDataMeta.class; // for i18n purposes, needed by Translator2!! private static String VALUE_NAME = "Name"; private static String VALUE_PATH = "Path"; private static String VALUE_ELEMENT = "Element"; private static String VALUE_RESULT = "result"; private static String VALUE_TYPE = "Type"; private static String VALUE_FORMAT = "Format"; private Shell shell; private GetXMLDataMeta meta; private String filename; private String encoding; private int nr; private String loopXPath; private HashSet<String> list; private List<RowMetaAndData> fieldsList; private RowMetaAndData[] fields; private String xml; private String url; /** * Creates a new dialog that will handle the wait while we're finding out loop nodes for an XML file */ public XMLInputFieldsImportProgressDialog( Shell shell, GetXMLDataMeta meta, String filename, String encoding, String loopXPath ) { this.shell = shell; this.meta = meta; this.fields = null; this.filename = filename; this.encoding = encoding; this.nr = 0; this.loopXPath = loopXPath; this.list = new HashSet<String>(); this.fieldsList = new ArrayList<RowMetaAndData>(); } public XMLInputFieldsImportProgressDialog( Shell shell, GetXMLDataMeta meta, String xmlSource, boolean useUrl, String loopXPath ) { this.shell = shell; this.meta = meta; this.fields = null; this.filename = null; this.encoding = null; this.nr = 0; this.loopXPath = loopXPath; this.list = new HashSet<String>(); this.fieldsList = new ArrayList<RowMetaAndData>(); if ( useUrl ) { this.xml = null; this.url = xmlSource; } else { this.xml = xmlSource; this.url = null; } } public RowMetaAndData[] open() { IRunnableWithProgress op = new IRunnableWithProgress() { public void run( IProgressMonitor monitor ) throws InvocationTargetException, InterruptedException { try { fields = doScan( monitor ); } catch ( Exception e ) { e.printStackTrace(); throw new InvocationTargetException( e, BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Exception.ErrorScanningFile", filename, e.toString() ) ); } } }; try { ProgressMonitorDialog pmd = new ProgressMonitorDialog( shell ); pmd.run( true, true, op ); } catch ( InvocationTargetException e ) { new ErrorDialog( shell, BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.ErrorScanningFile.Title" ), BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.ErrorScanningFile.Message" ), e ); } catch ( InterruptedException e ) { new ErrorDialog( shell, BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.ErrorScanningFile.Title" ), BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.ErrorScanningFile.Message" ), e ); } return fields; } @SuppressWarnings( "unchecked" ) private RowMetaAndData[] doScan( IProgressMonitor monitor ) throws Exception { monitor.beginTask( BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ScanningFile", filename ), 1 ); SAXReader reader = XMLParserFactoryProducer.getSAXReader( null ); monitor.worked( 1 ); if ( monitor.isCanceled() ) { return null; } // Validate XML against specified schema? if ( meta.isValidating() ) { reader.setValidation( true ); reader.setFeature( "http://apache.org/xml/features/validation/schema", true ); } else { // Ignore DTD reader.setEntityResolver( new IgnoreDTDEntityResolver() ); } monitor.worked( 1 ); monitor .beginTask( BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingDocument" ), 1 ); if ( monitor.isCanceled() ) { return null; } InputStream is = null; try { Document document = null; if ( !Utils.isEmpty( filename ) ) { is = KettleVFS.getInputStream( filename ); document = reader.read( is, encoding ); } else { if ( !Utils.isEmpty( xml ) ) { document = reader.read( new StringReader( xml ) ); } else { document = reader.read( new URL( url ) ); } } monitor.worked( 1 ); monitor.beginTask( BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.DocumentOpened" ), 1 ); monitor.worked( 1 ); monitor.beginTask( BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingNode" ), 1 ); if ( monitor.isCanceled() ) { return null; } List<Node> nodes = document.selectNodes( this.loopXPath ); monitor.worked( 1 ); monitor.subTask( BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes" ) ); if ( monitor.isCanceled() ) { return null; } for ( Node node : nodes ) { if ( monitor.isCanceled() ) { return null; } nr++; monitor.subTask( BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes", String .valueOf( nr ) ) ); monitor.subTask( BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes", node .getPath() ) ); setNodeField( node, monitor ); childNode( node, monitor ); } monitor.worked( 1 ); } finally { try { if ( is != null ) { is.close(); } } catch ( Exception e ) { /* Ignore */ } } RowMetaAndData[] listFields = fieldsList.toArray( new RowMetaAndData[fieldsList.size()] ); monitor.setTaskName( BaseMessages.getString( PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.NodesReturned" ) ); monitor.done(); return listFields; } @SuppressWarnings( "unchecked" ) private void setNodeField( Node node, IProgressMonitor monitor ) { Element e = (Element) node; // get all attributes List<Attribute> lista = e.attributes(); for ( int i = 0; i < lista.size(); i++ ) { setAttributeField( lista.get( i ), monitor ); } // Get Node Name String nodename = node.getName(); String nodenametxt = cleanString( node.getPath() ); if ( !Utils.isEmpty( nodenametxt ) && !list.contains( nodenametxt ) ) { nr++; monitor.subTask( BaseMessages.getString( PKG, "GetXMLDataXMLInputFieldsImportProgressDialog.Task.FetchFields", String.valueOf( nr ) ) ); monitor.subTask( BaseMessages.getString( PKG, "GetXMLDataXMLInputFieldsImportProgressDialog.Task.AddingField", nodename ) ); RowMetaAndData row = new RowMetaAndData(); row.addValue( VALUE_NAME, Value.VALUE_TYPE_STRING, nodename ); row.addValue( VALUE_PATH, Value.VALUE_TYPE_STRING, nodenametxt ); row.addValue( VALUE_ELEMENT, Value.VALUE_TYPE_STRING, GetXMLDataField.ElementTypeDesc[0] ); row.addValue( VALUE_RESULT, Value.VALUE_TYPE_STRING, GetXMLDataField.ResultTypeDesc[0] ); // Get Node value String valueNode = node.getText(); // Try to get the Type if ( IsDate( valueNode ) ) { row.addValue( VALUE_TYPE, Value.VALUE_TYPE_STRING, "Date" ); row.addValue( VALUE_FORMAT, Value.VALUE_TYPE_STRING, "yyyy/MM/dd" ); } else if ( IsInteger( valueNode ) ) { row.addValue( VALUE_TYPE, Value.VALUE_TYPE_STRING, "Integer" ); row.addValue( VALUE_FORMAT, Value.VALUE_TYPE_STRING, null ); } else if ( IsNumber( valueNode ) ) { row.addValue( VALUE_TYPE, Value.VALUE_TYPE_STRING, "Number" ); row.addValue( VALUE_FORMAT, Value.VALUE_TYPE_STRING, null ); } else { row.addValue( VALUE_TYPE, Value.VALUE_TYPE_STRING, "String" ); row.addValue( VALUE_FORMAT, Value.VALUE_TYPE_STRING, null ); } fieldsList.add( row ); list.add( nodenametxt ); } // end if } private void setAttributeField( Attribute attribute, IProgressMonitor monitor ) { // Get Attribute Name String attributname = attribute.getName(); String attributnametxt = cleanString( attribute.getPath() ); if ( !Utils.isEmpty( attributnametxt ) && !list.contains( attribute.getPath() ) ) { nr++; monitor.subTask( BaseMessages.getString( PKG, "GetXMLDataXMLInputFieldsImportProgressDialog.Task.FetchFields", String.valueOf( nr ) ) ); monitor.subTask( BaseMessages.getString( PKG, "GetXMLDataXMLInputFieldsImportProgressDialog.Task.AddingField", attributname ) ); RowMetaAndData row = new RowMetaAndData(); row.addValue( VALUE_NAME, Value.VALUE_TYPE_STRING, attributname ); row.addValue( VALUE_PATH, Value.VALUE_TYPE_STRING, attributnametxt ); row.addValue( VALUE_ELEMENT, Value.VALUE_TYPE_STRING, GetXMLDataField.ElementTypeDesc[1] ); row.addValue( VALUE_RESULT, Value.VALUE_TYPE_STRING, GetXMLDataField.ResultTypeDesc[0] ); // Get attribute value String valueAttr = attribute.getText(); // Try to get the Type if ( IsDate( valueAttr ) ) { row.addValue( VALUE_TYPE, Value.VALUE_TYPE_STRING, "Date" ); row.addValue( VALUE_FORMAT, Value.VALUE_TYPE_STRING, "yyyy/MM/dd" ); } else if ( IsInteger( valueAttr ) ) { row.addValue( VALUE_TYPE, Value.VALUE_TYPE_STRING, "Integer" ); row.addValue( VALUE_FORMAT, Value.VALUE_TYPE_STRING, null ); } else if ( IsNumber( valueAttr ) ) { row.addValue( VALUE_TYPE, Value.VALUE_TYPE_STRING, "Number" ); row.addValue( VALUE_FORMAT, Value.VALUE_TYPE_STRING, null ); } else { row.addValue( VALUE_TYPE, Value.VALUE_TYPE_STRING, "String" ); row.addValue( VALUE_FORMAT, Value.VALUE_TYPE_STRING, null ); } list.add( attribute.getPath() ); } // end if } private String cleanString( String inputstring ) { String retval = inputstring; retval = retval.replace( this.loopXPath, "" ); while ( retval.startsWith( GetXMLDataMeta.N0DE_SEPARATOR ) ) { retval = retval.substring( 1, retval.length() ); } return retval; } private boolean IsDate( String str ) { // TODO: What about other dates? Maybe something for a CRQ try { SimpleDateFormat fdate = new SimpleDateFormat( "yyyy/MM/dd" ); fdate.setLenient( false ); fdate.parse( str ); } catch ( Exception e ) { return false; } return true; } private boolean IsInteger( String str ) { try { Integer.parseInt( str ); } catch ( NumberFormatException e ) { return false; } return true; } private boolean IsNumber( String str ) { try { Float.parseFloat( str ); } catch ( Exception e ) { return false; } return true; } private boolean childNode( Node node, IProgressMonitor monitor ) { boolean rc = false; // true: we found child nodes Element ce = (Element) node; // List child for ( int j = 0; j < ce.nodeCount(); j++ ) { Node cnode = ce.node( j ); if ( !Utils.isEmpty( cnode.getName() ) ) { Element cce = (Element) cnode; if ( cce.nodeCount() > 1 ) { if ( childNode( cnode, monitor ) == false ) { // We do not have child nodes ... setNodeField( cnode, monitor ); rc = true; } } else { setNodeField( cnode, monitor ); rc = true; } } } return rc; } }