/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.job.entries.dtdvalidator; import org.apache.commons.vfs2.FileObject; import org.pentaho.di.core.Const; import org.pentaho.di.core.logging.LogChannelInterface; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLParserFactoryProducer; import org.pentaho.di.i18n.BaseMessages; import org.w3c.dom.Document; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; public class DTDValidator { private static Class<?> PKG = JobEntryDTDValidator.class; // for i18n purposes, needed by Translator2!! private String xmlfilename; private String xsdfilename; private boolean interndtd; private String errormessage; private int errorscount; private LogChannelInterface log; public DTDValidator( LogChannelInterface log ) { this.log = log; this.xmlfilename = null; this.xsdfilename = null; this.interndtd = false; this.errormessage = null; this.errorscount = 0; } public void setXMLFilename( String xmlfilename ) { this.xmlfilename = xmlfilename; } public String getXMLFilename() { return this.xmlfilename; } public void setDTDFilename( String xsdfilename ) { this.xsdfilename = xsdfilename; } public String getDTDFilename() { return this.xsdfilename; } public void setInternDTD( boolean value ) { this.interndtd = value; } public boolean isInternDTD() { return this.interndtd; } private void setErrorMessage( String value ) { this.errormessage = value; } public String getErrorMessage() { return this.errormessage; } public int getNrErrors() { return this.errorscount; } private void setNrErrors( int value ) { this.errorscount = value; } public boolean validate() { boolean isValid = false; FileObject xmlFileObject = null; FileObject dtdFileObject = null; try { if ( xmlfilename != null && ( ( getDTDFilename() != null && !isInternDTD() ) || ( isInternDTD() ) ) ) { xmlFileObject = KettleVFS.getFileObject( getXMLFilename() ); if ( xmlFileObject.exists() ) { URL xmlFile = new File( KettleVFS.getFilename( xmlFileObject ) ).toURI().toURL(); StringBuffer xmlStringbuffer = new StringBuffer( "" ); try ( InputStreamReader is = new InputStreamReader( xmlFile.openStream() ); BufferedReader xmlBufferedReader = new BufferedReader( is ) ) { char[] buffertXML = new char[ 1024 ]; int lenXML; while ( ( lenXML = xmlBufferedReader.read( buffertXML ) ) != -1 ) { xmlStringbuffer.append( buffertXML, 0, lenXML ); } } // Prepare parsing ... DocumentBuilderFactory docBuilderFactory = XMLParserFactoryProducer.createSecureDocBuilderFactory(); DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); // Let's try to get XML document encoding docBuilderFactory.setValidating( false ); ByteArrayInputStream ba = new ByteArrayInputStream( xmlStringbuffer.toString().getBytes( "UTF-8" ) ); Document xmlDocDTD = docBuilder.parse( ba ); if ( ba != null ) { ba.close(); } String encoding; if ( xmlDocDTD.getXmlEncoding() == null ) { encoding = "UTF-8"; } else { encoding = xmlDocDTD.getXmlEncoding(); } int xmlStartDTD = xmlStringbuffer.indexOf( "<!DOCTYPE" ); if ( isInternDTD() ) { // DTD find in the XML document if ( xmlStartDTD != -1 ) { log.logBasic( BaseMessages.getString( PKG, "JobEntryDTDValidator.ERRORDTDFound.Label", getXMLFilename() ) ); } else { setErrorMessage( BaseMessages.getString( PKG, "JobEntryDTDValidator.ERRORDTDNotFound.Label", getXMLFilename() ) ); } } else { // DTD in external document // If we find an intern declaration, we remove it dtdFileObject = KettleVFS.getFileObject( getDTDFilename() ); if ( dtdFileObject.exists() ) { if ( xmlStartDTD != -1 ) { int EndDTD = xmlStringbuffer.indexOf( ">", xmlStartDTD ); // String DocTypeDTD = xmlStringbuffer.substring(xmlStartDTD, EndDTD + 1); xmlStringbuffer.replace( xmlStartDTD, EndDTD + 1, "" ); } String xmlRootnodeDTD = xmlDocDTD.getDocumentElement().getNodeName(); String RefDTD = "<?xml version='" + xmlDocDTD.getXmlVersion() + "' encoding='" + encoding + "'?>\n<!DOCTYPE " + xmlRootnodeDTD + " SYSTEM '" + KettleVFS.getFilename( dtdFileObject ) + "'>\n"; int xmloffsetDTD = xmlStringbuffer.indexOf( "<" + xmlRootnodeDTD ); xmlStringbuffer.replace( 0, xmloffsetDTD, RefDTD ); } else { log .logError( BaseMessages.getString( PKG, "JobEntryDTDValidator.ERRORDTDFileNotExists.Subject" ), BaseMessages.getString( PKG, "JobEntryDTDValidator.ERRORDTDFileNotExists.Msg", getDTDFilename() ) ); } } if ( !( isInternDTD() && xmlStartDTD == -1 || ( !isInternDTD() && !dtdFileObject.exists() ) ) ) { // Let's parse now ... MyErrorHandler error = new MyErrorHandler(); docBuilderFactory.setValidating( true ); docBuilder = docBuilderFactory.newDocumentBuilder(); docBuilder.setErrorHandler( error ); ba = new ByteArrayInputStream( xmlStringbuffer.toString().getBytes( encoding ) ); xmlDocDTD = docBuilder.parse( ba ); if ( error.errorMessage == null ) { log.logBasic( BaseMessages.getString( PKG, "JobEntryDTDValidator.DTDValidatorOK.Subject" ), BaseMessages .getString( PKG, "JobEntryDTDValidator.DTDValidatorOK.Label", getXMLFilename() ) ); // Everything is OK isValid = true; } else { // Invalid DTD setNrErrors( error.nrErrors ); setErrorMessage( BaseMessages .getString( PKG, "JobEntryDTDValidator.DTDValidatorKO", getXMLFilename(), error.nrErrors, error.errorMessage ) ); } } } else { if ( !xmlFileObject.exists() ) { setErrorMessage( BaseMessages.getString( PKG, "JobEntryDTDValidator.FileDoesNotExist.Label", getXMLFilename() ) ); } } } else { setErrorMessage( BaseMessages.getString( PKG, "JobEntryDTDValidator.AllFilesNotNull.Label" ) ); } } catch ( Exception e ) { setErrorMessage( BaseMessages.getString( PKG, "JobEntryDTDValidator.ErrorDTDValidator.Label", getXMLFilename(), getDTDFilename(), e.getMessage() ) ); } finally { try { if ( xmlFileObject != null ) { xmlFileObject.close(); } if ( dtdFileObject != null ) { dtdFileObject.close(); } } catch ( IOException e ) { // Ignore close errors } } return isValid; } private static class MyErrorHandler implements ErrorHandler { String errorMessage = null; int error = -1; int nrErrors = 0; public void warning( SAXParseException e ) throws SAXException { error = 0; allErrors( e ); } public void error( SAXParseException e ) throws SAXException { error = 1; allErrors( e ); } public void fatalError( SAXParseException e ) throws SAXException { error = 2; allErrors( e ); } private void allErrors( SAXParseException e ) { nrErrors++; if ( errorMessage == null ) { errorMessage = ""; } errorMessage += Const.CR + Const.CR + "Error Nr." + nrErrors + " ("; switch ( error ) { case 0: errorMessage += "Warning"; break; case 1: errorMessage += "Error"; break; case 2: errorMessage += "FatalError"; break; default: break; } errorMessage += ")" + Const.CR + " Public ID: " + e.getPublicId() + Const.CR + " System ID: " + e.getSystemId() + Const.CR + " Line number: " + e.getLineNumber() + Const.CR + " Column number: " + e.getColumnNumber() + Const.CR + " Message: " + e.getMessage(); } } }