/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.job.entries.xmlwellformed; import static org.pentaho.di.job.entry.validator.AbstractFileValidator.putVariableSpace; import static org.pentaho.di.job.entry.validator.AndValidator.putValidators; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.andValidator; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.fileExistsValidator; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.notNullValidator; import java.io.IOException; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.vfs2.AllFileSelector; import org.apache.commons.vfs2.FileObject; import org.apache.commons.vfs2.FileSelectInfo; import org.apache.commons.vfs2.FileType; import org.pentaho.di.cluster.SlaveServer; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.Result; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.annotations.JobEntry; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleDatabaseException; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLCheck; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.Job; import org.pentaho.di.job.JobMeta; import org.pentaho.di.job.entry.JobEntryBase; import org.pentaho.di.job.entry.JobEntryInterface; import org.pentaho.di.job.entry.validator.ValidatorContext; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.metastore.api.IMetaStore; import org.w3c.dom.Node; import org.xml.sax.helpers.DefaultHandler; /** * This defines a 'xml well formed' job entry. * * @author Samatar Hassan * @since 26-03-2008 */ @JobEntry( id = "XML_WELL_FORMED", i18nPackageName = "org.pentaho.di.job.entries.xmlwellformed", image = "XFC.svg", name = "XML_WELL_FORMED.Name", description = "XML_WELL_FORMED.Description", categoryDescription = "XML_WELL_FORMED.Category", documentationUrl = "http://wiki.pentaho.com/display/EAI/Check+if+XML+file+is+well+formed" ) public class JobEntryXMLWellFormed extends JobEntryBase implements Cloneable, JobEntryInterface { private static Class<?> PKG = JobEntryXMLWellFormed.class; // for i18n purposes, needed by Translator2!! public static String SUCCESS_IF_AT_LEAST_X_FILES_WELL_FORMED = "success_when_at_least"; public static String SUCCESS_IF_BAD_FORMED_FILES_LESS = "success_if_bad_formed_files_less"; public static String SUCCESS_IF_NO_ERRORS = "success_if_no_errors"; public static String ADD_ALL_FILENAMES = "all_filenames"; public static String ADD_WELL_FORMED_FILES_ONLY = "only_well_formed_filenames"; public static String ADD_BAD_FORMED_FILES_ONLY = "only_bad_formed_filenames"; @Deprecated public boolean arg_from_previous; @Deprecated public boolean include_subfolders; @Deprecated public String[] source_filefolder; @Deprecated public String[] wildcard; private String nr_errors_less_than; private String success_condition; private String resultfilenames; int NrAllErrors = 0; int NrBadFormed = 0; int NrWellFormed = 0; int limitFiles = 0; int NrErrors = 0; boolean successConditionBroken = false; boolean successConditionBrokenExit = false; public JobEntryXMLWellFormed( String n ) { super( n, "" ); resultfilenames = ADD_ALL_FILENAMES; arg_from_previous = false; source_filefolder = null; wildcard = null; include_subfolders = false; nr_errors_less_than = "10"; success_condition = SUCCESS_IF_NO_ERRORS; } public JobEntryXMLWellFormed() { this( "" ); } public Object clone() { JobEntryXMLWellFormed je = (JobEntryXMLWellFormed) super.clone(); return je; } public String getXML() { StringBuilder retval = new StringBuilder( 300 ); retval.append( super.getXML() ); retval.append( " " ).append( XMLHandler.addTagValue( "arg_from_previous", arg_from_previous ) ); retval.append( " " ).append( XMLHandler.addTagValue( "include_subfolders", include_subfolders ) ); retval.append( " " ).append( XMLHandler.addTagValue( "nr_errors_less_than", nr_errors_less_than ) ); retval.append( " " ).append( XMLHandler.addTagValue( "success_condition", success_condition ) ); retval.append( " " ).append( XMLHandler.addTagValue( "resultfilenames", resultfilenames ) ); retval.append( " " ).append( XMLHandler.openTag( "fields" ) ).append( Const.CR ); if ( source_filefolder != null ) { for ( int i = 0; i < source_filefolder.length; i++ ) { retval.append( " " ).append( XMLHandler.openTag( "field" ) ).append( Const.CR ); retval.append( " " ).append( XMLHandler.addTagValue( "source_filefolder", source_filefolder[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "wildcard", wildcard[i] ) ); retval.append( " " ).append( XMLHandler.closeTag( "field" ) ).append( Const.CR ); } } retval.append( " " ).append( XMLHandler.closeTag( "fields" ) ).append( Const.CR ); return retval.toString(); } public void loadXML( Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers, Repository rep, IMetaStore metaStore ) throws KettleXMLException { try { super.loadXML( entrynode, databases, slaveServers ); arg_from_previous = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "arg_from_previous" ) ); include_subfolders = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "include_subfolders" ) ); nr_errors_less_than = XMLHandler.getTagValue( entrynode, "nr_errors_less_than" ); success_condition = XMLHandler.getTagValue( entrynode, "success_condition" ); resultfilenames = XMLHandler.getTagValue( entrynode, "resultfilenames" ); Node fields = XMLHandler.getSubNode( entrynode, "fields" ); // How many field arguments? int nrFields = XMLHandler.countNodes( fields, "field" ); source_filefolder = new String[nrFields]; wildcard = new String[nrFields]; // Read them all... for ( int i = 0; i < nrFields; i++ ) { Node fnode = XMLHandler.getSubNodeByNr( fields, "field", i ); source_filefolder[i] = XMLHandler.getTagValue( fnode, "source_filefolder" ); wildcard[i] = XMLHandler.getTagValue( fnode, "wildcard" ); } } catch ( KettleXMLException xe ) { throw new KettleXMLException( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.Exception.UnableLoadXML" ), xe ); } } public void loadRep( Repository rep, IMetaStore metaStore, ObjectId id_jobentry, List<DatabaseMeta> databases, List<SlaveServer> slaveServers ) throws KettleException { try { arg_from_previous = rep.getJobEntryAttributeBoolean( id_jobentry, "arg_from_previous" ); include_subfolders = rep.getJobEntryAttributeBoolean( id_jobentry, "include_subfolders" ); nr_errors_less_than = rep.getJobEntryAttributeString( id_jobentry, "nr_errors_less_than" ); success_condition = rep.getJobEntryAttributeString( id_jobentry, "success_condition" ); resultfilenames = rep.getJobEntryAttributeString( id_jobentry, "resultfilenames" ); // How many arguments? int argnr = rep.countNrJobEntryAttributes( id_jobentry, "source_filefolder" ); source_filefolder = new String[argnr]; wildcard = new String[argnr]; // Read them all... for ( int a = 0; a < argnr; a++ ) { source_filefolder[a] = rep.getJobEntryAttributeString( id_jobentry, a, "source_filefolder" ); wildcard[a] = rep.getJobEntryAttributeString( id_jobentry, a, "wildcard" ); } } catch ( KettleException dbe ) { throw new KettleException( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.Exception.UnableLoadRep" ) + id_jobentry, dbe ); } } public void saveRep( Repository rep, IMetaStore metaStore, ObjectId id_job ) throws KettleException { try { rep.saveJobEntryAttribute( id_job, getObjectId(), "arg_from_previous", arg_from_previous ); rep.saveJobEntryAttribute( id_job, getObjectId(), "include_subfolders", include_subfolders ); rep.saveJobEntryAttribute( id_job, getObjectId(), "nr_errors_less_than", nr_errors_less_than ); rep.saveJobEntryAttribute( id_job, getObjectId(), "success_condition", success_condition ); rep.saveJobEntryAttribute( id_job, getObjectId(), "resultfilenames", resultfilenames ); // save the arguments... if ( source_filefolder != null ) { for ( int i = 0; i < source_filefolder.length; i++ ) { rep.saveJobEntryAttribute( id_job, getObjectId(), i, "source_filefolder", source_filefolder[i] ); rep.saveJobEntryAttribute( id_job, getObjectId(), i, "wildcard", wildcard[i] ); } } } catch ( KettleDatabaseException dbe ) { throw new KettleException( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.Exception.UnableSaveRep" ) + id_job, dbe ); } } public Result execute( Result previousResult, int nr ) throws KettleException { Result result = previousResult; result.setNrErrors( 1 ); result.setResult( false ); List<RowMetaAndData> rows = result.getRows(); RowMetaAndData resultRow = null; NrErrors = 0; NrWellFormed = 0; NrBadFormed = 0; limitFiles = Const.toInt( environmentSubstitute( getNrErrorsLessThan() ), 10 ); successConditionBroken = false; successConditionBrokenExit = false; // Get source and destination files, also wildcard String[] vsourcefilefolder = source_filefolder; String[] vwildcard = wildcard; if ( arg_from_previous ) { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.ArgFromPrevious.Found", ( rows != null ? rows .size() : 0 ) + "" ) ); } } if ( arg_from_previous && rows != null ) { // Copy the input row to the (command line) arguments for ( int iteration = 0; iteration < rows.size() && !parentJob.isStopped(); iteration++ ) { if ( successConditionBroken ) { if ( !successConditionBrokenExit ) { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.SuccessConditionbroken", "" + NrAllErrors ) ); successConditionBrokenExit = true; } result.setEntryNr( NrAllErrors ); result.setNrLinesRejected( NrBadFormed ); result.setNrLinesWritten( NrWellFormed ); return result; } resultRow = rows.get( iteration ); // Get source and destination file names, also wildcard String vsourcefilefolder_previous = resultRow.getString( 0, null ); String vwildcard_previous = resultRow.getString( 1, null ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.ProcessingRow", vsourcefilefolder_previous, vwildcard_previous ) ); } processFileFolder( vsourcefilefolder_previous, vwildcard_previous, parentJob, result ); } } else if ( vsourcefilefolder != null ) { for ( int i = 0; i < vsourcefilefolder.length && !parentJob.isStopped(); i++ ) { if ( successConditionBroken ) { if ( !successConditionBrokenExit ) { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.SuccessConditionbroken", "" + NrAllErrors ) ); successConditionBrokenExit = true; } result.setEntryNr( NrAllErrors ); result.setNrLinesRejected( NrBadFormed ); result.setNrLinesWritten( NrWellFormed ); return result; } if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.ProcessingRow", vsourcefilefolder[i], vwildcard[i] ) ); } processFileFolder( vsourcefilefolder[i], vwildcard[i], parentJob, result ); } } // Success Condition result.setNrErrors( NrAllErrors ); result.setNrLinesRejected( NrBadFormed ); result.setNrLinesWritten( NrWellFormed ); if ( getSuccessStatus() ) { result.setNrErrors( 0 ); result.setResult( true ); } displayResults(); return result; } private void displayResults() { if ( log.isDetailed() ) { logDetailed( "=======================================" ); logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.Info.FilesInError", "" + NrErrors ) ); logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.Info.FilesInBadFormed", "" + NrBadFormed ) ); logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.Info.FilesInWellFormed", "" + NrWellFormed ) ); logDetailed( "=======================================" ); } } private boolean checkIfSuccessConditionBroken() { boolean retval = false; if ( ( NrAllErrors > 0 && getSuccessCondition().equals( SUCCESS_IF_NO_ERRORS ) ) || ( NrBadFormed >= limitFiles && getSuccessCondition().equals( SUCCESS_IF_BAD_FORMED_FILES_LESS ) ) ) { retval = true; } return retval; } private boolean getSuccessStatus() { boolean retval = false; if ( ( NrAllErrors == 0 && getSuccessCondition().equals( SUCCESS_IF_NO_ERRORS ) ) || ( NrWellFormed >= limitFiles && getSuccessCondition().equals( SUCCESS_IF_AT_LEAST_X_FILES_WELL_FORMED ) ) || ( NrBadFormed < limitFiles && getSuccessCondition().equals( SUCCESS_IF_BAD_FORMED_FILES_LESS ) ) ) { retval = true; } return retval; } private void updateErrors() { NrErrors++; updateAllErrors(); if ( checkIfSuccessConditionBroken() ) { // Success condition was broken successConditionBroken = true; } } private void updateAllErrors() { NrAllErrors = NrErrors + NrBadFormed; } public static class XMLTreeHandler extends DefaultHandler { } private boolean CheckFile( FileObject file ) { boolean retval = false; try { retval = XMLCheck.isXMLFileWellFormed( file ); } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.ErrorCheckingFile", file.toString(), e.getMessage() ) ); } return retval; } private boolean processFileFolder( String sourcefilefoldername, String wildcard, Job parentJob, Result result ) { boolean entrystatus = false; FileObject sourcefilefolder = null; FileObject CurrentFile = null; // Get real source file and wilcard String realSourceFilefoldername = environmentSubstitute( sourcefilefoldername ); if ( Utils.isEmpty( realSourceFilefoldername ) ) { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.log.FileFolderEmpty", sourcefilefoldername ) ); // Update Errors updateErrors(); return entrystatus; } String realWildcard = environmentSubstitute( wildcard ); try { sourcefilefolder = KettleVFS.getFileObject( realSourceFilefoldername, this ); if ( sourcefilefolder.exists() ) { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.FileExists", sourcefilefolder.toString() ) ); } if ( sourcefilefolder.getType() == FileType.FILE ) { entrystatus = checkOneFile( sourcefilefolder, result, parentJob ); } else if ( sourcefilefolder.getType() == FileType.FOLDER ) { FileObject[] fileObjects = sourcefilefolder.findFiles( new AllFileSelector() { public boolean traverseDescendents( FileSelectInfo info ) { return true; } public boolean includeFile( FileSelectInfo info ) { FileObject fileObject = info.getFile(); try { if ( fileObject == null ) { return false; } if ( fileObject.getType() != FileType.FILE ) { return false; } } catch ( Exception ex ) { // Upon error don't process the file. return false; } finally { if ( fileObject != null ) { try { fileObject.close(); } catch ( IOException ex ) { /* Ignore */ } } } return true; } } ); if ( fileObjects != null ) { for ( int j = 0; j < fileObjects.length && !parentJob.isStopped(); j++ ) { if ( successConditionBroken ) { if ( !successConditionBrokenExit ) { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.SuccessConditionbroken", "" + NrAllErrors ) ); successConditionBrokenExit = true; } return false; } // Fetch files in list one after one ... CurrentFile = fileObjects[j]; if ( !CurrentFile.getParent().toString().equals( sourcefilefolder.toString() ) ) { // Not in the Base Folder..Only if include sub folders if ( include_subfolders ) { if ( GetFileWildcard( CurrentFile.toString(), realWildcard ) ) { checkOneFile( CurrentFile, result, parentJob ); } } } else { // In the base folder if ( GetFileWildcard( CurrentFile.toString(), realWildcard ) ) { checkOneFile( CurrentFile, result, parentJob ); } } } } } else { logError( BaseMessages .getString( PKG, "JobXMLWellFormed.Error.UnknowFileFormat", sourcefilefolder.toString() ) ); // Update Errors updateErrors(); } } else { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.SourceFileNotExists", realSourceFilefoldername ) ); // Update Errors updateErrors(); } } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.Exception.Processing", realSourceFilefoldername .toString(), e ) ); // Update Errors updateErrors(); } finally { if ( sourcefilefolder != null ) { try { sourcefilefolder.close(); } catch ( IOException ex ) { /* Ignore */ } } if ( CurrentFile != null ) { try { CurrentFile.close(); } catch ( IOException ex ) { /* Ignore */ } } } return entrystatus; } private boolean checkOneFile( FileObject file, Result result, Job parentJob ) throws KettleException { boolean retval = false; try { // We deal with a file..so let's check if it's well formed boolean retformed = CheckFile( file ); if ( !retformed ) { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.FileBadFormed", file.toString() ) ); // Update Bad formed files number updateBadFormed(); if ( resultfilenames.equals( ADD_ALL_FILENAMES ) || resultfilenames.equals( ADD_BAD_FORMED_FILES_ONLY ) ) { addFileToResultFilenames( KettleVFS.getFilename( file ), result, parentJob ); } } else { if ( log.isDetailed() ) { logDetailed( "---------------------------" ); logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.FileWellFormed", file.toString() ) ); } // Update Well formed files number updateWellFormed(); if ( resultfilenames.equals( ADD_ALL_FILENAMES ) || resultfilenames.equals( ADD_WELL_FORMED_FILES_ONLY ) ) { addFileToResultFilenames( KettleVFS.getFilename( file ), result, parentJob ); } } } catch ( Exception e ) { throw new KettleException( "Unable to verify file '" + file + "'", e ); } return retval; } private void updateWellFormed() { NrWellFormed++; } private void updateBadFormed() { NrBadFormed++; updateAllErrors(); } private void addFileToResultFilenames( String fileaddentry, Result result, Job parentJob ) { try { ResultFile resultFile = new ResultFile( ResultFile.FILE_TYPE_GENERAL, KettleVFS.getFileObject( fileaddentry, this ), parentJob .getJobname(), toString() ); result.getResultFiles().put( resultFile.getFile().toString(), resultFile ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobXMLWellFormed.Log.FileAddedToResultFilesName", fileaddentry ) ); } } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "JobXMLWellFormed.Error.AddingToFilenameResult", fileaddentry, e .getMessage() ) ); } } /********************************************************** * * @param selectedfile * @param wildcard * @return True if the selectedfile matches the wildcard **********************************************************/ private boolean GetFileWildcard( String selectedfile, String wildcard ) { Pattern pattern = null; boolean getIt = true; if ( !Utils.isEmpty( wildcard ) ) { pattern = Pattern.compile( wildcard ); // First see if the file matches the regular expression! if ( pattern != null ) { Matcher matcher = pattern.matcher( selectedfile ); getIt = matcher.matches(); } } return getIt; } public boolean isIncludeSubfolders() { return include_subfolders; } public void setIncludeSubfolders( boolean include_subfoldersin ) { this.include_subfolders = include_subfoldersin; } public boolean isArgFromPrevious() { return arg_from_previous; } public void setArgFromPrevious( boolean argfrompreviousin ) { this.arg_from_previous = argfrompreviousin; } public void setNrErrorsLessThan( String nr_errors_less_than ) { this.nr_errors_less_than = nr_errors_less_than; } public String[] getSourceFileFolders() { return source_filefolder; } public void setSourceFileFolders( String[] filefolders ) { this.source_filefolder = filefolders; } public String[] getSourceWildcards() { return wildcard; } public void setSourceWildcards( String[] wildcards ) { this.wildcard = wildcards; } public String getNrErrorsLessThan() { return nr_errors_less_than; } public void setSuccessCondition( String success_condition ) { this.success_condition = success_condition; } public String getSuccessCondition() { return success_condition; } public void setResultFilenames( String resultfilenames ) { this.resultfilenames = resultfilenames; } public String getResultFilenames() { return resultfilenames; } public boolean evaluates() { return true; } public void check( List<CheckResultInterface> remarks, JobMeta jobMeta, VariableSpace space, Repository repository, IMetaStore metaStore ) { boolean res = andValidator().validate( this, "arguments", remarks, putValidators( notNullValidator() ) ); if ( res == false ) { return; } ValidatorContext ctx = new ValidatorContext(); putVariableSpace( ctx, getVariables() ); putValidators( ctx, notNullValidator(), fileExistsValidator() ); for ( int i = 0; i < source_filefolder.length; i++ ) { andValidator().validate( this, "arguments[" + i + "]", remarks, ctx ); } } }