/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.job.entries.msaccessbulkload; import static org.pentaho.di.job.entry.validator.AbstractFileValidator.putVariableSpace; import static org.pentaho.di.job.entry.validator.AndValidator.putValidators; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.andValidator; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.fileExistsValidator; import static org.pentaho.di.job.entry.validator.JobEntryValidatorUtils.notNullValidator; import java.io.File; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.pentaho.di.cluster.SlaveServer; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.Result; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleDatabaseException; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.Job; import org.pentaho.di.job.JobMeta; import org.pentaho.di.job.entry.JobEntryBase; import org.pentaho.di.job.entry.JobEntryInterface; import org.pentaho.di.job.entry.validator.ValidatorContext; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.metastore.api.IMetaStore; import org.w3c.dom.Node; import com.healthmarketscience.jackcess.Database; /** * This defines a 'MS Access Bulk Load' job entry. It will compare to load data from files into Microsoft Access files * * @author Samatar Hassan * @since 24-07-2008 * */ @org.pentaho.di.core.annotations.JobEntry( id = "MS_ACCESS_BULK_LOAD", categoryDescription = "i18n:org.pentaho.di.job:JobCategory.Category.Deprecated", i18nPackageName = "org.pentaho.di.job.entries.msaccessbulkload", image = "BLKMSACC.svg", name = "JobEntryMSAccessBulkLoad.Name.Default", description = "JobEntryMSAccessBulkLoad.Tooltip" ) public class JobEntryMSAccessBulkLoad extends JobEntryBase implements Cloneable, JobEntryInterface { private static Class<?> PKG = JobEntryMSAccessBulkLoad.class; // for i18n purposes, needed by Translator2!! private boolean add_result_filenames; private boolean include_subfolders; private boolean is_args_from_previous; public String[] source_filefolder; public String[] source_wildcard; public String[] delimiter; public String[] target_Db; public String[] target_table; private String limit; private String success_condition; public String SUCCESS_IF_AT_LEAST = "success_when_at_least"; public String SUCCESS_IF_ERRORS_LESS = "success_if_errors_less"; public String SUCCESS_IF_NO_ERRORS = "success_if_no_errors"; private int NrErrors = 0; private int NrSuccess = 0; private int NrFilesToProcess = 0; private boolean continueProcessing = true; int limitFiles = 0; public JobEntryMSAccessBulkLoad( String n ) { super( n, "" ); limit = "10"; success_condition = SUCCESS_IF_NO_ERRORS; add_result_filenames = false; include_subfolders = false; source_filefolder = null; source_wildcard = null; delimiter = null; target_Db = null; target_table = null; } public JobEntryMSAccessBulkLoad() { this( "" ); } public Object clone() { JobEntryMSAccessBulkLoad je = (JobEntryMSAccessBulkLoad) super.clone(); return je; } public void setAddResultFilenames( boolean addtoresultfilenames ) { this.add_result_filenames = addtoresultfilenames; } public boolean isAddResultFilename() { return add_result_filenames; } public void setIncludeSubFoders( boolean includeSubfolders ) { this.include_subfolders = includeSubfolders; } public boolean isIncludeSubFoders() { return include_subfolders; } public void setArgsFromPrevious( boolean isargsfromprevious ) { this.is_args_from_previous = isargsfromprevious; } public boolean isArgsFromPrevious() { return is_args_from_previous; } public String getXML() { StringBuffer retval = new StringBuffer( 50 ); retval.append( super.getXML() ); retval.append( " " ).append( XMLHandler.addTagValue( "include_subfolders", include_subfolders ) ); retval.append( " " ).append( XMLHandler.addTagValue( "is_args_from_previous", is_args_from_previous ) ); retval.append( " " ).append( XMLHandler.addTagValue( "add_result_filenames", add_result_filenames ) ); retval.append( " " ).append( XMLHandler.addTagValue( "limit", limit ) ); retval.append( " " ).append( XMLHandler.addTagValue( "success_condition", success_condition ) ); retval.append( " <fields>" ).append( Const.CR ); if ( source_filefolder != null ) { for ( int i = 0; i < source_filefolder.length; i++ ) { retval.append( " <field>" ).append( Const.CR ); retval.append( " " ).append( XMLHandler.addTagValue( "source_filefolder", source_filefolder[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "source_wildcard", source_wildcard[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "delimiter", delimiter[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "target_db", target_Db[i] ) ); retval.append( " " ).append( XMLHandler.addTagValue( "target_table", target_table[i] ) ); retval.append( " </field>" ).append( Const.CR ); } } retval.append( " </fields>" ).append( Const.CR ); return retval.toString(); } public void loadXML( Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers, Repository rep, IMetaStore metaStore ) throws KettleXMLException { try { super.loadXML( entrynode, databases, slaveServers ); include_subfolders = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "include_subfolders" ) ); add_result_filenames = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "add_result_filenames" ) ); is_args_from_previous = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "is_args_from_previous" ) ); limit = XMLHandler.getTagValue( entrynode, "limit" ); success_condition = XMLHandler.getTagValue( entrynode, "success_condition" ); Node fields = XMLHandler.getSubNode( entrynode, "fields" ); // How many field arguments? int nrFields = XMLHandler.countNodes( fields, "field" ); source_filefolder = new String[nrFields]; delimiter = new String[nrFields]; source_wildcard = new String[nrFields]; target_Db = new String[nrFields]; target_table = new String[nrFields]; // Read them all... for ( int i = 0; i < nrFields; i++ ) { Node fnode = XMLHandler.getSubNodeByNr( fields, "field", i ); source_filefolder[i] = XMLHandler.getTagValue( fnode, "source_filefolder" ); source_wildcard[i] = XMLHandler.getTagValue( fnode, "source_wildcard" ); delimiter[i] = XMLHandler.getTagValue( fnode, "delimiter" ); target_Db[i] = XMLHandler.getTagValue( fnode, "target_db" ); target_table[i] = XMLHandler.getTagValue( fnode, "target_table" ); } } catch ( KettleXMLException xe ) { throw new KettleXMLException( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Meta.UnableLoadXML", xe .getMessage() ), xe ); } } public void loadRep( Repository rep, IMetaStore metaStore, ObjectId id_jobentry, List<DatabaseMeta> databases, List<SlaveServer> slaveServers ) throws KettleException { try { include_subfolders = rep.getJobEntryAttributeBoolean( id_jobentry, "include_subfolders" ); add_result_filenames = rep.getJobEntryAttributeBoolean( id_jobentry, "add_result_filenames" ); is_args_from_previous = rep.getJobEntryAttributeBoolean( id_jobentry, "is_args_from_previous" ); limit = rep.getJobEntryAttributeString( id_jobentry, "limit" ); success_condition = rep.getJobEntryAttributeString( id_jobentry, "success_condition" ); // How many arguments? int argnr = rep.countNrJobEntryAttributes( id_jobentry, "source_filefolder" ); source_filefolder = new String[argnr]; source_wildcard = new String[argnr]; delimiter = new String[argnr]; target_Db = new String[argnr]; target_table = new String[argnr]; // Read them all... for ( int a = 0; a < argnr; a++ ) { source_filefolder[a] = rep.getJobEntryAttributeString( id_jobentry, a, "source_filefolder" ); source_wildcard[a] = rep.getJobEntryAttributeString( id_jobentry, a, "source_wildcard" ); delimiter[a] = rep.getJobEntryAttributeString( id_jobentry, a, "delimiter" ); target_Db[a] = rep.getJobEntryAttributeString( id_jobentry, a, "target_db" ); target_table[a] = rep.getJobEntryAttributeString( id_jobentry, a, "target_table" ); } } catch ( KettleException dbe ) { throw new KettleException( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Meta.UnableLoadRep", "" + id_jobentry, dbe.getMessage() ), dbe ); } } private void displayResults() { if ( log.isDetailed() ) { logDetailed( "=======================================" ); logDetailed( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.Info.FilesToLoad", "" + NrFilesToProcess ) ); logDetailed( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.Info.FilesLoaded", "" + NrSuccess ) ); logDetailed( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.Info.NrErrors", "" + NrErrors ) ); logDetailed( "=======================================" ); } } public void setLimit( String limit ) { this.limit = limit; } public String getLimit() { return limit; } public void setSuccessCondition( String success_condition ) { this.success_condition = success_condition; } public String getSuccessCondition() { return success_condition; } private void addFileToResultFilenames( String fileaddentry, Result result, Job parentJob ) { try { ResultFile resultFile = new ResultFile( ResultFile.FILE_TYPE_GENERAL, KettleVFS.getFileObject( fileaddentry, this ), parentJob .getJobname(), toString() ); result.getResultFiles().put( resultFile.getFile().toString(), resultFile ); if ( log.isDebug() ) { logDebug( " ------ " ); logDebug( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.FileAddedToResultFilesName", fileaddentry ) ); } } catch ( Exception e ) { log.logError( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Error.AddingToFilenameResult" ), fileaddentry + "" + e.getMessage() ); } } public void saveRep( Repository rep, IMetaStore metaStore, ObjectId id_job ) throws KettleException { try { rep.saveJobEntryAttribute( id_job, getObjectId(), "include_subfolders", include_subfolders ); rep.saveJobEntryAttribute( id_job, getObjectId(), "add_result_filenames", add_result_filenames ); rep.saveJobEntryAttribute( id_job, getObjectId(), "is_args_from_previous", is_args_from_previous ); rep.saveJobEntryAttribute( id_job, getObjectId(), "limit", limit ); rep.saveJobEntryAttribute( id_job, getObjectId(), "success_condition", success_condition ); // save the arguments... if ( source_filefolder != null ) { for ( int i = 0; i < source_filefolder.length; i++ ) { rep.saveJobEntryAttribute( id_job, getObjectId(), i, "source_filefolder", source_filefolder[i] ); rep.saveJobEntryAttribute( id_job, getObjectId(), i, "source_wildcard", source_wildcard[i] ); rep.saveJobEntryAttribute( id_job, getObjectId(), i, "delimiter", delimiter[i] ); rep.saveJobEntryAttribute( id_job, getObjectId(), i, "target_Db", target_Db[i] ); rep.saveJobEntryAttribute( id_job, getObjectId(), i, "target_table", target_table[i] ); } } } catch ( KettleDatabaseException dbe ) { throw new KettleException( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Meta.UnableSave", "" + id_job, dbe.getMessage() ), dbe ); } } /********************************************************** * * @param selectedfile * @param wildcard * @return True if the selectedfile matches the wildcard **********************************************************/ private boolean GetFileWildcard( String selectedfile, String wildcard ) { Pattern pattern = null; boolean getIt = true; if ( !Utils.isEmpty( wildcard ) ) { pattern = Pattern.compile( wildcard ); // First see if the file matches the regular expression! if ( pattern != null ) { Matcher matcher = pattern.matcher( selectedfile ); getIt = matcher.matches(); } } return getIt; } private boolean processOneRow( String sourceFileFolder, String SourceWildcard, String Delimiter, String targetDb, String targetTable, Job parentJob, Result result ) { boolean retval = false; try { File sourcefilefolder = new File( sourceFileFolder ); if ( !sourcefilefolder.exists() ) { logError( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Error.CanNotFindFile", sourceFileFolder ) ); return retval; } if ( sourcefilefolder.isFile() ) { // source is a file retval = importFile( sourceFileFolder, Delimiter, targetDb, targetTable, result, parentJob ); } else if ( sourcefilefolder.isDirectory() ) { // source is a folder File[] listFiles = sourcefilefolder.listFiles(); int nrFiles = listFiles.length; if ( nrFiles > 0 ) { // let's fetch children... for ( int i = 0; i < nrFiles && !parentJob.isStopped() && continueProcessing; i++ ) { File child = listFiles[i]; String childFullName = child.getAbsolutePath(); if ( child.isFile() ) { if ( Utils.isEmpty( SourceWildcard ) ) { retval = importFile( childFullName, Delimiter, targetDb, targetTable, result, parentJob ); } else { if ( GetFileWildcard( childFullName, SourceWildcard ) ) { retval = importFile( childFullName, Delimiter, targetDb, targetTable, result, parentJob ); } } } else { // let's run process for this folder if ( include_subfolders ) { processOneRow( childFullName, SourceWildcard, Delimiter, targetDb, targetTable, parentJob, result ); } } } } else { logBasic( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.FolderEmpty", sourceFileFolder ) ); } } else { logError( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.UnknowType", sourceFileFolder ) ); } } catch ( Exception e ) { logError( e.getMessage() ); incrErrors(); } return retval; } private boolean importFile( String sourceFilename, String delimiter, String targetFilename, String tablename, Result result, Job parentJob ) { boolean retval = false; try { incrFilesToProcess(); File sourceDataFile = new File( sourceFilename ); File targetDbFile = new File( targetFilename ); // create database if needed if ( !targetDbFile.exists() ) { Database.create( targetDbFile ); logBasic( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.DbCreated", targetFilename ) ); } else { // Database exists Database db = Database.open( targetDbFile ); logBasic( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.DbOpened", targetFilename ) ); // Let's check table if ( db.getTable( tablename ) != null ) { logBasic( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.TableExists", tablename ) ); } // close database if ( db != null ) { db.close(); } logBasic( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.DbCosed", targetFilename ) ); } // load data from file Database.open( targetDbFile ).importFile( tablename, sourceDataFile, delimiter ); logBasic( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.FileImported", sourceFilename, tablename, targetFilename ) ); // add filename to result filename if ( add_result_filenames ) { addFileToResultFilenames( sourceFilename, result, parentJob ); } retval = true; } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Error.LoadingDataToFile", sourceFilename, targetFilename, e.getMessage() ) ); } if ( retval ) { incrSuccess(); } else { incrErrors(); } return retval; } private void incrErrors() { NrErrors++; if ( checkIfSuccessConditionBroken() ) { // Success condition was broken continueProcessing = true; } } private boolean checkIfSuccessConditionBroken() { boolean retval = false; if ( ( NrErrors > 0 && getSuccessCondition().equals( SUCCESS_IF_NO_ERRORS ) ) || ( NrErrors >= limitFiles && getSuccessCondition().equals( SUCCESS_IF_ERRORS_LESS ) ) ) { retval = true; } return retval; } private void incrSuccess() { NrSuccess++; } private void incrFilesToProcess() { NrFilesToProcess++; } public Result execute( Result previousResult, int nr ) { Result result = previousResult; List<RowMetaAndData> rows = result.getRows(); RowMetaAndData resultRow = null; result.setResult( false ); NrErrors = 0; NrSuccess = 0; NrFilesToProcess = 0; continueProcessing = true; limitFiles = Const.toInt( environmentSubstitute( getLimit() ), 10 ); // Get source and destination files, also wildcard String[] vsourceFilefolder = source_filefolder; String[] vsourceWildcard = source_wildcard; String[] vsourceDelimiter = delimiter; String[] targetDb = target_Db; String[] targetTable = target_table; try { if ( is_args_from_previous ) { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.Log.ArgFromPrevious.Found", ( rows != null ? rows.size() : 0 ) + "" ) ); } } if ( is_args_from_previous && rows != null ) { for ( int iteration = 0; iteration < rows.size() && !parentJob.isStopped() && continueProcessing; iteration++ ) { resultRow = rows.get( iteration ); // Get source and destination file names, also wildcard String vSourceFileFolder_previous = resultRow.getString( 0, null ); String vSourceWildcard_previous = resultRow.getString( 1, null ); String vDelimiter_previous = resultRow.getString( 2, null ); String vTargetDb_previous = resultRow.getString( 3, null ); String vTargetTable_previous = resultRow.getString( 4, null ); processOneRow( vSourceFileFolder_previous, vSourceWildcard_previous, vDelimiter_previous, vTargetDb_previous, vTargetTable_previous, parentJob, result ); } } else if ( vsourceFilefolder != null && targetDb != null && targetTable != null ) { for ( int i = 0; i < vsourceFilefolder.length && !parentJob.isStopped() && continueProcessing; i++ ) { // get real values String realSourceFileFolder = environmentSubstitute( vsourceFilefolder[i] ); String realSourceWildcard = environmentSubstitute( vsourceWildcard[i] ); String realSourceDelimiter = environmentSubstitute( vsourceDelimiter[i] ); String realTargetDb = environmentSubstitute( targetDb[i] ); String realTargetTable = environmentSubstitute( targetTable[i] ); processOneRow( realSourceFileFolder, realSourceWildcard, realSourceDelimiter, realTargetDb, realTargetTable, parentJob, result ); } } } catch ( Exception e ) { incrErrors(); logError( BaseMessages.getString( PKG, "JobEntryMSAccessBulkLoad.UnexpectedError", e.getMessage() ) ); } // Success Condition result.setNrErrors( NrErrors ); result.setNrLinesInput( NrFilesToProcess ); result.setNrLinesWritten( NrSuccess ); if ( getSuccessStatus() ) { result.setResult( true ); } displayResults(); return result; } private boolean getSuccessStatus() { boolean retval = false; if ( ( NrErrors == 0 && getSuccessCondition().equals( SUCCESS_IF_NO_ERRORS ) ) || ( NrSuccess >= limitFiles && getSuccessCondition().equals( SUCCESS_IF_AT_LEAST ) ) || ( NrErrors <= limitFiles && getSuccessCondition().equals( SUCCESS_IF_ERRORS_LESS ) ) ) { retval = true; } return retval; } public boolean evaluates() { return true; } public void check( List<CheckResultInterface> remarks, JobMeta jobMeta, VariableSpace space, Repository repository, IMetaStore metaStore ) { boolean res = andValidator().validate( this, "arguments", remarks, putValidators( notNullValidator() ) ); if ( res == false ) { return; } ValidatorContext ctx = new ValidatorContext(); putVariableSpace( ctx, getVariables() ); putValidators( ctx, notNullValidator(), fileExistsValidator() ); for ( int i = 0; i < source_filefolder.length; i++ ) { andValidator().validate( this, "arguments[" + i + "]", remarks, ctx ); } } }