/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.job.entries.unzip; import org.pentaho.di.job.entry.validator.AbstractFileValidator; import org.pentaho.di.job.entry.validator.AndValidator; import org.pentaho.di.job.entry.validator.JobEntryValidatorUtils; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.text.SimpleDateFormat; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.vfs2.AllFileSelector; import org.apache.commons.vfs2.FileObject; import org.apache.commons.vfs2.FileSelectInfo; import org.apache.commons.vfs2.FileSystemException; import org.apache.commons.vfs2.FileType; import org.pentaho.di.cluster.SlaveServer; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.Result; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.RowMetaAndData; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleDatabaseException; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.util.StringUtil; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.Job; import org.pentaho.di.job.JobMeta; import org.pentaho.di.job.entry.JobEntryBase; import org.pentaho.di.job.entry.JobEntryInterface; import org.pentaho.di.job.entry.validator.ValidatorContext; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.metastore.api.IMetaStore; import org.w3c.dom.Node; /** * This defines a 'unzip' job entry. Its main use would be to unzip files in a directory * * @author Samatar Hassan * @since 25-09-2007 * */ public class JobEntryUnZip extends JobEntryBase implements Cloneable, JobEntryInterface { private static Class<?> PKG = JobEntryUnZip.class; // for i18n purposes, needed by Translator2!! private String zipFilename; public int afterunzip; private String wildcard; private String wildcardexclude; private String sourcedirectory; // targetdirectory on screen, renamed because of PDI-7761 private String movetodirectory; private boolean addfiletoresult; private boolean isfromprevious; private boolean adddate; private boolean addtime; private boolean SpecifyFormat; private String date_time_format; private boolean rootzip; private boolean createfolder; private String nr_limit; private String wildcardSource; private int iffileexist; private boolean createMoveToDirectory; private boolean addOriginalTimestamp; private boolean setOriginalModificationDate; public String SUCCESS_IF_AT_LEAST_X_FILES_UN_ZIPPED = "success_when_at_least"; public String SUCCESS_IF_ERRORS_LESS = "success_if_errors_less"; public String SUCCESS_IF_NO_ERRORS = "success_if_no_errors"; private String success_condition; public static final int IF_FILE_EXISTS_SKIP = 0; public static final int IF_FILE_EXISTS_OVERWRITE = 1; public static final int IF_FILE_EXISTS_UNIQ = 2; public static final int IF_FILE_EXISTS_FAIL = 3; public static final int IF_FILE_EXISTS_OVERWRITE_DIFF_SIZE = 4; public static final int IF_FILE_EXISTS_OVERWRITE_EQUAL_SIZE = 5; public static final int IF_FILE_EXISTS_OVERWRITE_ZIP_BIG = 6; public static final int IF_FILE_EXISTS_OVERWRITE_ZIP_BIG_EQUAL = 7; public static final int IF_FILE_EXISTS_OVERWRITE_ZIP_SMALL = 8; public static final int IF_FILE_EXISTS_OVERWRITE_ZIP_SMALL_EQUAL = 9; public static final String[] typeIfFileExistsCode = /* WARNING: DO NOT TRANSLATE THIS. */ { "SKIP", "OVERWRITE", "UNIQ", "FAIL", "OVERWRITE_DIFF_SIZE", "OVERWRITE_EQUAL_SIZE", "OVERWRITE_ZIP_BIG", "OVERWRITE_ZIP_BIG_EQUAL", "OVERWRITE_ZIP_BIG_SMALL", "OVERWRITE_ZIP_BIG_SMALL_EQUAL", }; public static final String[] typeIfFileExistsDesc = { BaseMessages.getString( PKG, "JobUnZip.Skip.Label" ), BaseMessages.getString( PKG, "JobUnZip.Overwrite.Label" ), BaseMessages.getString( PKG, "JobUnZip.Give_Unique_Name.Label" ), BaseMessages.getString( PKG, "JobUnZip.Fail.Label" ), BaseMessages.getString( PKG, "JobUnZip.OverwriteIfSizeDifferent.Label" ), BaseMessages.getString( PKG, "JobUnZip.OverwriteIfSizeEquals.Label" ), BaseMessages.getString( PKG, "JobUnZip.OverwriteIfZipBigger.Label" ), BaseMessages.getString( PKG, "JobUnZip.OverwriteIfZipBiggerOrEqual.Label" ), BaseMessages.getString( PKG, "JobUnZip.OverwriteIfZipSmaller.Label" ), BaseMessages.getString( PKG, "JobUnZip.OverwriteIfZipSmallerOrEqual.Label" ), }; private int NrErrors = 0; private int NrSuccess = 0; boolean successConditionBroken = false; boolean successConditionBrokenExit = false; int limitFiles = 0; private static SimpleDateFormat daf; private boolean dateFormatSet = false; public JobEntryUnZip( String n ) { super( n, "" ); zipFilename = null; afterunzip = 0; wildcard = null; wildcardexclude = null; sourcedirectory = null; movetodirectory = null; addfiletoresult = false; isfromprevious = false; adddate = false; addtime = false; SpecifyFormat = false; rootzip = false; createfolder = false; nr_limit = "10"; wildcardSource = null; iffileexist = IF_FILE_EXISTS_SKIP; success_condition = SUCCESS_IF_NO_ERRORS; createMoveToDirectory = false; addOriginalTimestamp = false; setOriginalModificationDate = false; } public JobEntryUnZip() { this( "" ); } public Object clone() { JobEntryUnZip je = (JobEntryUnZip) super.clone(); return je; } public String getXML() { StringBuilder retval = new StringBuilder( 550 ); // 450 chars in just spaces and tag names alone retval.append( super.getXML() ); retval.append( " " ).append( XMLHandler.addTagValue( "zipfilename", zipFilename ) ); retval.append( " " ).append( XMLHandler.addTagValue( "wildcard", wildcard ) ); retval.append( " " ).append( XMLHandler.addTagValue( "wildcardexclude", wildcardexclude ) ); retval.append( " " ).append( XMLHandler.addTagValue( "targetdirectory", sourcedirectory ) ); retval.append( " " ).append( XMLHandler.addTagValue( "movetodirectory", movetodirectory ) ); retval.append( " " ).append( XMLHandler.addTagValue( "afterunzip", afterunzip ) ); retval.append( " " ).append( XMLHandler.addTagValue( "addfiletoresult", addfiletoresult ) ); retval.append( " " ).append( XMLHandler.addTagValue( "isfromprevious", isfromprevious ) ); retval.append( " " ).append( XMLHandler.addTagValue( "adddate", adddate ) ); retval.append( " " ).append( XMLHandler.addTagValue( "addtime", addtime ) ); retval.append( " " ).append( XMLHandler.addTagValue( "addOriginalTimestamp", addOriginalTimestamp ) ); retval.append( " " ).append( XMLHandler.addTagValue( "SpecifyFormat", SpecifyFormat ) ); retval.append( " " ).append( XMLHandler.addTagValue( "date_time_format", date_time_format ) ); retval.append( " " ).append( XMLHandler.addTagValue( "rootzip", rootzip ) ); retval.append( " " ).append( XMLHandler.addTagValue( "createfolder", createfolder ) ); retval.append( " " ).append( XMLHandler.addTagValue( "nr_limit", nr_limit ) ); retval.append( " " ).append( XMLHandler.addTagValue( "wildcardSource", wildcardSource ) ); retval.append( " " ).append( XMLHandler.addTagValue( "success_condition", success_condition ) ); retval .append( " " ).append( XMLHandler.addTagValue( "iffileexists", getIfFileExistsCode( iffileexist ) ) ); retval.append( " " ).append( XMLHandler.addTagValue( "create_move_to_directory", createMoveToDirectory ) ); retval.append( " " ).append( XMLHandler.addTagValue( "setOriginalModificationDate", setOriginalModificationDate ) ); return retval.toString(); } public void loadXML( Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers, Repository rep, IMetaStore metaStore ) throws KettleXMLException { try { super.loadXML( entrynode, databases, slaveServers ); zipFilename = XMLHandler.getTagValue( entrynode, "zipfilename" ); afterunzip = Const.toInt( XMLHandler.getTagValue( entrynode, "afterunzip" ), -1 ); wildcard = XMLHandler.getTagValue( entrynode, "wildcard" ); wildcardexclude = XMLHandler.getTagValue( entrynode, "wildcardexclude" ); sourcedirectory = XMLHandler.getTagValue( entrynode, "targetdirectory" ); movetodirectory = XMLHandler.getTagValue( entrynode, "movetodirectory" ); addfiletoresult = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "addfiletoresult" ) ); isfromprevious = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "isfromprevious" ) ); adddate = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "adddate" ) ); addtime = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "addtime" ) ); addOriginalTimestamp = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "addOriginalTimestamp" ) ); SpecifyFormat = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "SpecifyFormat" ) ); date_time_format = XMLHandler.getTagValue( entrynode, "date_time_format" ); rootzip = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "rootzip" ) ); createfolder = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "createfolder" ) ); nr_limit = XMLHandler.getTagValue( entrynode, "nr_limit" ); wildcardSource = XMLHandler.getTagValue( entrynode, "wildcardSource" ); success_condition = XMLHandler.getTagValue( entrynode, "success_condition" ); if ( Utils.isEmpty( success_condition ) ) { success_condition = SUCCESS_IF_NO_ERRORS; } iffileexist = getIfFileExistsInt( XMLHandler.getTagValue( entrynode, "iffileexists" ) ); createMoveToDirectory = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "create_move_to_directory" ) ); setOriginalModificationDate = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "setOriginalModificationDate" ) ); } catch ( KettleXMLException xe ) { throw new KettleXMLException( "Unable to load job entry of type 'unzip' from XML node", xe ); } } public void loadRep( Repository rep, IMetaStore metaStore, ObjectId id_jobentry, List<DatabaseMeta> databases, List<SlaveServer> slaveServers ) throws KettleException { try { zipFilename = rep.getJobEntryAttributeString( id_jobentry, "zipfilename" ); afterunzip = (int) rep.getJobEntryAttributeInteger( id_jobentry, "afterunzip" ); wildcard = rep.getJobEntryAttributeString( id_jobentry, "wildcard" ); wildcardexclude = rep.getJobEntryAttributeString( id_jobentry, "wildcardexclude" ); sourcedirectory = rep.getJobEntryAttributeString( id_jobentry, "targetdirectory" ); movetodirectory = rep.getJobEntryAttributeString( id_jobentry, "movetodirectory" ); addfiletoresult = rep.getJobEntryAttributeBoolean( id_jobentry, "addfiletoresult" ); isfromprevious = rep.getJobEntryAttributeBoolean( id_jobentry, "isfromprevious" ); adddate = rep.getJobEntryAttributeBoolean( id_jobentry, "adddate" ); addtime = rep.getJobEntryAttributeBoolean( id_jobentry, "addtime" ); addOriginalTimestamp = rep.getJobEntryAttributeBoolean( id_jobentry, "addOriginalTimestamp" ); SpecifyFormat = rep.getJobEntryAttributeBoolean( id_jobentry, "SpecifyFormat" ); date_time_format = rep.getJobEntryAttributeString( id_jobentry, "date_time_format" ); rootzip = rep.getJobEntryAttributeBoolean( id_jobentry, "rootzip" ); createfolder = rep.getJobEntryAttributeBoolean( id_jobentry, "createfolder" ); nr_limit = rep.getJobEntryAttributeString( id_jobentry, "nr_limit" ); wildcardSource = rep.getJobEntryAttributeString( id_jobentry, "wildcardSource" ); success_condition = rep.getJobEntryAttributeString( id_jobentry, "success_condition" ); if ( Utils.isEmpty( success_condition ) ) { success_condition = SUCCESS_IF_NO_ERRORS; } iffileexist = getIfFileExistsInt( rep.getJobEntryAttributeString( id_jobentry, "iffileexists" ) ); createMoveToDirectory = rep.getJobEntryAttributeBoolean( id_jobentry, "create_move_to_directory" ); setOriginalModificationDate = rep.getJobEntryAttributeBoolean( id_jobentry, "setOriginalModificationDate" ); } catch ( KettleException dbe ) { throw new KettleException( "Unable to load job entry of type 'unzip' from the repository for id_jobentry=" + id_jobentry, dbe ); } } public void saveRep( Repository rep, IMetaStore metaStore, ObjectId id_job ) throws KettleException { try { rep.saveJobEntryAttribute( id_job, getObjectId(), "zipfilename", zipFilename ); rep.saveJobEntryAttribute( id_job, getObjectId(), "afterunzip", afterunzip ); rep.saveJobEntryAttribute( id_job, getObjectId(), "wildcard", wildcard ); rep.saveJobEntryAttribute( id_job, getObjectId(), "wildcardexclude", wildcardexclude ); rep.saveJobEntryAttribute( id_job, getObjectId(), "targetdirectory", sourcedirectory ); rep.saveJobEntryAttribute( id_job, getObjectId(), "movetodirectory", movetodirectory ); rep.saveJobEntryAttribute( id_job, getObjectId(), "addfiletoresult", addfiletoresult ); rep.saveJobEntryAttribute( id_job, getObjectId(), "isfromprevious", isfromprevious ); rep.saveJobEntryAttribute( id_job, getObjectId(), "addtime", addtime ); rep.saveJobEntryAttribute( id_job, getObjectId(), "adddate", adddate ); rep.saveJobEntryAttribute( id_job, getObjectId(), "addOriginalTimestamp", addOriginalTimestamp ); rep.saveJobEntryAttribute( id_job, getObjectId(), "SpecifyFormat", SpecifyFormat ); rep.saveJobEntryAttribute( id_job, getObjectId(), "date_time_format", date_time_format ); rep.saveJobEntryAttribute( id_job, getObjectId(), "rootzip", rootzip ); rep.saveJobEntryAttribute( id_job, getObjectId(), "createfolder", createfolder ); rep.saveJobEntryAttribute( id_job, getObjectId(), "nr_limit", nr_limit ); rep.saveJobEntryAttribute( id_job, getObjectId(), "wildcardSource", wildcardSource ); rep.saveJobEntryAttribute( id_job, getObjectId(), "success_condition", success_condition ); rep.saveJobEntryAttribute( id_job, getObjectId(), "iffileexists", getIfFileExistsCode( iffileexist ) ); rep.saveJobEntryAttribute( id_job, getObjectId(), "create_move_to_directory", createMoveToDirectory ); rep .saveJobEntryAttribute( id_job, getObjectId(), "setOriginalModificationDate", setOriginalModificationDate ); } catch ( KettleDatabaseException dbe ) { throw new KettleException( "Unable to save job entry of type 'unzip' to the repository for id_job=" + id_job, dbe ); } } public Result execute( Result previousResult, int nr ) { Result result = previousResult; result.setResult( false ); result.setNrErrors( 1 ); List<RowMetaAndData> rows = result.getRows(); RowMetaAndData resultRow = null; String realFilenameSource = environmentSubstitute( zipFilename ); String realWildcardSource = environmentSubstitute( wildcardSource ); String realWildcard = environmentSubstitute( wildcard ); String realWildcardExclude = environmentSubstitute( wildcardexclude ); String realTargetdirectory = environmentSubstitute( sourcedirectory ); String realMovetodirectory = environmentSubstitute( movetodirectory ); limitFiles = Const.toInt( environmentSubstitute( getLimit() ), 10 ); NrErrors = 0; NrSuccess = 0; successConditionBroken = false; successConditionBrokenExit = false; if ( isfromprevious ) { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.ArgFromPrevious.Found", ( rows != null ? rows .size() : 0 ) + "" ) ); } if ( rows.size() == 0 ) { return result; } } else { if ( Utils.isEmpty( zipFilename ) ) { // Zip file/folder is missing logError( BaseMessages.getString( PKG, "JobUnZip.No_ZipFile_Defined.Label" ) ); return result; } } FileObject fileObject = null; FileObject targetdir = null; FileObject movetodir = null; try { // Let's make some checks here, before running job entry ... if ( Utils.isEmpty( realTargetdirectory ) ) { logError( BaseMessages.getString( PKG, "JobUnZip.Error.TargetFolderMissing" ) ); return result; } boolean exitjobentry = false; // Target folder targetdir = KettleVFS.getFileObject( realTargetdirectory, this ); if ( !targetdir.exists() ) { if ( createfolder ) { targetdir.createFolder(); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.TargetFolderCreated", realTargetdirectory ) ); } } else { log.logError( BaseMessages.getString( PKG, "JobUnZip.TargetFolderNotFound.Label" ) ); exitjobentry = true; } } else { if ( !( targetdir.getType() == FileType.FOLDER ) ) { log.logError( BaseMessages.getString( PKG, "JobUnZip.TargetFolderNotFolder.Label", realTargetdirectory ) ); exitjobentry = true; } else { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.TargetFolderExists.Label", realTargetdirectory ) ); } } } // If user want to move zip files after process // movetodirectory must be provided if ( afterunzip == 2 ) { if ( Utils.isEmpty( movetodirectory ) ) { log.logError( BaseMessages.getString( PKG, "JobUnZip.MoveToDirectoryEmpty.Label" ) ); exitjobentry = true; } else { movetodir = KettleVFS.getFileObject( realMovetodirectory, this ); if ( !( movetodir.exists() ) || !( movetodir.getType() == FileType.FOLDER ) ) { if ( createMoveToDirectory ) { movetodir.createFolder(); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.MoveToFolderCreated", realMovetodirectory ) ); } } else { log.logError( BaseMessages.getString( PKG, "JobUnZip.MoveToDirectoryNotExists.Label" ) ); exitjobentry = true; } } } } // We found errors...now exit if ( exitjobentry ) { return result; } if ( isfromprevious ) { if ( rows != null ) { // Copy the input row to the (command line) arguments for ( int iteration = 0; iteration < rows.size() && !parentJob.isStopped(); iteration++ ) { if ( successConditionBroken ) { if ( !successConditionBrokenExit ) { logError( BaseMessages.getString( PKG, "JobUnZip.Error.SuccessConditionbroken", "" + NrErrors ) ); successConditionBrokenExit = true; } result.setNrErrors( NrErrors ); return result; } resultRow = rows.get( iteration ); // Get sourcefile/folder and wildcard realFilenameSource = resultRow.getString( 0, null ); realWildcardSource = resultRow.getString( 1, null ); fileObject = KettleVFS.getFileObject( realFilenameSource, this ); if ( fileObject.exists() ) { processOneFile( result, parentJob, fileObject, realTargetdirectory, realWildcard, realWildcardExclude, movetodir, realMovetodirectory, realWildcardSource ); } else { updateErrors(); logError( BaseMessages.getString( PKG, "JobUnZip.Error.CanNotFindFile", realFilenameSource ) ); } } } } else { fileObject = KettleVFS.getFileObject( realFilenameSource, this ); if ( !fileObject.exists() ) { log.logError( BaseMessages.getString( PKG, "JobUnZip.ZipFile.NotExists.Label", realFilenameSource ) ); return result; } if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.Zip_FileExists.Label", realFilenameSource ) ); } if ( Utils.isEmpty( sourcedirectory ) ) { log.logError( BaseMessages.getString( PKG, "JobUnZip.SourceFolderNotFound.Label" ) ); return result; } processOneFile( result, parentJob, fileObject, realTargetdirectory, realWildcard, realWildcardExclude, movetodir, realMovetodirectory, realWildcardSource ); } } catch ( Exception e ) { log.logError( BaseMessages.getString( PKG, "JobUnZip.ErrorUnzip.Label", realFilenameSource, e.getMessage() ) ); updateErrors(); } finally { if ( fileObject != null ) { try { fileObject.close(); } catch ( IOException ex ) { /* Ignore */ } } if ( targetdir != null ) { try { targetdir.close(); } catch ( IOException ex ) { /* Ignore */ } } if ( movetodir != null ) { try { movetodir.close(); } catch ( IOException ex ) { /* Ignore */ } } } result.setNrErrors( NrErrors ); result.setNrLinesWritten( NrSuccess ); if ( getSuccessStatus() ) { result.setResult( true ); } displayResults(); return result; } private void displayResults() { if ( log.isDetailed() ) { logDetailed( "=======================================" ); logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.Info.FilesInError", "" + NrErrors ) ); logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.Info.FilesInSuccess", "" + NrSuccess ) ); logDetailed( "=======================================" ); } } private boolean processOneFile( Result result, Job parentJob, FileObject fileObject, String realTargetdirectory, String realWildcard, String realWildcardExclude, FileObject movetodir, String realMovetodirectory, String realWildcardSource ) { boolean retval = false; try { if ( fileObject.getType().equals( FileType.FILE ) ) { // We have to unzip one zip file if ( !unzipFile( fileObject, realTargetdirectory, realWildcard, realWildcardExclude, result, parentJob, fileObject, movetodir, realMovetodirectory ) ) { updateErrors(); } else { updateSuccess(); } } else { // Folder..let's see wildcard FileObject[] children = fileObject.getChildren(); for ( int i = 0; i < children.length && !parentJob.isStopped(); i++ ) { if ( successConditionBroken ) { if ( !successConditionBrokenExit ) { logError( BaseMessages.getString( PKG, "JobUnZip.Error.SuccessConditionbroken", "" + NrErrors ) ); successConditionBrokenExit = true; } return false; } // Get only file! if ( !children[i].getType().equals( FileType.FOLDER ) ) { boolean unzip = true; String filename = children[i].getName().getPath(); Pattern patternSource = null; if ( !Utils.isEmpty( realWildcardSource ) ) { patternSource = Pattern.compile( realWildcardSource ); } // First see if the file matches the regular expression! if ( patternSource != null ) { Matcher matcher = patternSource.matcher( filename ); unzip = matcher.matches(); } if ( unzip ) { if ( !unzipFile( children[i], realTargetdirectory, realWildcard, realWildcardExclude, result, parentJob, fileObject, movetodir, realMovetodirectory ) ) { updateErrors(); } else { updateSuccess(); } } } } } } catch ( Exception e ) { updateErrors(); logError( BaseMessages.getString( PKG, "JobUnZip.Error.Label", e.getMessage() ) ); } finally { if ( fileObject != null ) { try { fileObject.close(); } catch ( IOException ex ) { /* Ignore */ } } } return retval; } private boolean unzipFile( FileObject sourceFileObject, String realTargetdirectory, String realWildcard, String realWildcardExclude, Result result, Job parentJob, FileObject fileObject, FileObject movetodir, String realMovetodirectory ) { boolean retval = false; String unzipToFolder = realTargetdirectory; try { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.ProcessingFile", sourceFileObject.toString() ) ); } // Do you create a root folder? // if ( rootzip ) { String shortSourceFilename = sourceFileObject.getName().getBaseName(); int lenstring = shortSourceFilename.length(); int lastindexOfDot = shortSourceFilename.lastIndexOf( '.' ); if ( lastindexOfDot == -1 ) { lastindexOfDot = lenstring; } String foldername = realTargetdirectory + "/" + shortSourceFilename.substring( 0, lastindexOfDot ); FileObject rootfolder = KettleVFS.getFileObject( foldername, this ); if ( !rootfolder.exists() ) { try { rootfolder.createFolder(); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.RootFolderCreated", foldername ) ); } } catch ( Exception e ) { throw new Exception( BaseMessages.getString( PKG, "JobUnZip.Error.CanNotCreateRootFolder", foldername ), e ); } } unzipToFolder = foldername; } // Try to read the entries from the VFS object... // String zipFilename = "zip:" + sourceFileObject.getName().getFriendlyURI(); FileObject zipFile = KettleVFS.getFileObject( zipFilename, this ); FileObject[] items = zipFile.findFiles( new AllFileSelector() { public boolean traverseDescendents( FileSelectInfo info ) { return true; } public boolean includeFile( FileSelectInfo info ) { // Never return the parent directory of a file list. if ( info.getDepth() == 0 ) { return false; } FileObject fileObject = info.getFile(); return fileObject != null; } } ); Pattern pattern = null; if ( !Utils.isEmpty( realWildcard ) ) { pattern = Pattern.compile( realWildcard ); } Pattern patternexclude = null; if ( !Utils.isEmpty( realWildcardExclude ) ) { patternexclude = Pattern.compile( realWildcardExclude ); } for ( FileObject item : items ) { if ( successConditionBroken ) { if ( !successConditionBrokenExit ) { logError( BaseMessages.getString( PKG, "JobUnZip.Error.SuccessConditionbroken", "" + NrErrors ) ); successConditionBrokenExit = true; } return false; } synchronized ( KettleVFS.getInstance().getFileSystemManager() ) { FileObject newFileObject = null; try { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.ProcessingZipEntry", item.getName().getURI(), sourceFileObject.toString() ) ); } // get real destination filename // String newFileName = unzipToFolder + Const.FILE_SEPARATOR + getTargetFilename( item ); newFileObject = KettleVFS.getFileObject( newFileName, this ); if ( item.getType().equals( FileType.FOLDER ) ) { // Directory // if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.CreatingDirectory.Label", newFileName ) ); } // Create Directory if necessary ... // if ( !newFileObject.exists() ) { newFileObject.createFolder(); } } else { // File // boolean getIt = true; boolean getItexclude = false; // First see if the file matches the regular expression! // if ( pattern != null ) { Matcher matcher = pattern.matcher( item.getName().getURI() ); getIt = matcher.matches(); } if ( patternexclude != null ) { Matcher matcherexclude = patternexclude.matcher( item.getName().getURI() ); getItexclude = matcherexclude.matches(); } boolean take = takeThisFile( item, newFileName ); if ( getIt && !getItexclude && take ) { if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.ExtractingEntry.Label", item .getName().getURI(), newFileName ) ); } if ( iffileexist == IF_FILE_EXISTS_UNIQ ) { // Create file with unique name int lenstring = newFileName.length(); int lastindexOfDot = newFileName.lastIndexOf( '.' ); if ( lastindexOfDot == -1 ) { lastindexOfDot = lenstring; } newFileName = newFileName.substring( 0, lastindexOfDot ) + StringUtil.getFormattedDateTimeNow( true ) + newFileName.substring( lastindexOfDot, lenstring ); if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.CreatingUniqFile", newFileName ) ); } } // See if the folder to the target file exists... // if ( !newFileObject.getParent().exists() ) { newFileObject.getParent().createFolder(); // creates the whole path. } InputStream is = null; OutputStream os = null; try { is = KettleVFS.getInputStream( item ); os = KettleVFS.getOutputStream( newFileObject, false ); if ( is != null ) { byte[] buff = new byte[2048]; int len; while ( ( len = is.read( buff ) ) > 0 ) { os.write( buff, 0, len ); } // Add filename to result filenames addFilenameToResultFilenames( result, parentJob, newFileName ); } } finally { if ( is != null ) { is.close(); } if ( os != null ) { os.close(); } } } // end if take } } catch ( Exception e ) { updateErrors(); logError( BaseMessages.getString( PKG, "JobUnZip.Error.CanNotProcessZipEntry", item.getName().getURI(), sourceFileObject .toString() ), e ); } finally { if ( newFileObject != null ) { try { newFileObject.close(); if ( setOriginalModificationDate ) { // Change last modification date newFileObject.getContent().setLastModifiedTime( item.getContent().getLastModifiedTime() ); } } catch ( Exception e ) { /* Ignore */ } // ignore this } // Close file object // close() does not release resources! KettleVFS.getInstance().getFileSystemManager().closeFileSystem( item.getFileSystem() ); if ( items != null ) { items = null; } } } // Synchronized block on KettleVFS.getInstance().getFileSystemManager() } // End for // Here gc() is explicitly called if e.g. createfile is used in the same // job for the same file. The problem is that after creating the file the // file object is not properly garbaged collected and thus the file cannot // be deleted anymore. This is a known problem in the JVM. // System.gc(); // Unzip done... if ( afterunzip == 1 ) { // delete zip file boolean deleted = fileObject.delete(); if ( !deleted ) { updateErrors(); logError( BaseMessages.getString( PKG, "JobUnZip.Cant_Delete_File.Label", sourceFileObject.toString() ) ); } // File deleted if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.File_Deleted.Label", sourceFileObject.toString() ) ); } } else if ( afterunzip == 2 ) { FileObject destFile = null; // Move File try { String destinationFilename = movetodir + Const.FILE_SEPARATOR + fileObject.getName().getBaseName(); destFile = KettleVFS.getFileObject( destinationFilename, this ); fileObject.moveTo( destFile ); // File moved if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "JobUnZip.Log.FileMovedTo", sourceFileObject.toString(), realMovetodirectory ) ); } } catch ( Exception e ) { updateErrors(); logError( BaseMessages.getString( PKG, "JobUnZip.Cant_Move_File.Label", sourceFileObject.toString(), realMovetodirectory, e .getMessage() ) ); } finally { if ( destFile != null ) { try { destFile.close(); } catch ( IOException ex ) { /* Ignore */ } } } } retval = true; } catch ( Exception e ) { updateErrors(); log.logError( BaseMessages.getString( PKG, "JobUnZip.ErrorUnzip.Label", sourceFileObject.toString(), e.getMessage() ), e ); } return retval; } private void addFilenameToResultFilenames( Result result, Job parentJob, String newfile ) throws Exception { if ( addfiletoresult ) { // Add file to result files name ResultFile resultFile = new ResultFile( ResultFile.FILE_TYPE_GENERAL, KettleVFS.getFileObject( newfile, this ), parentJob .getJobname(), toString() ); result.getResultFiles().put( resultFile.getFile().toString(), resultFile ); } } private void updateErrors() { NrErrors++; if ( checkIfSuccessConditionBroken() ) { // Success condition was broken successConditionBroken = true; } } private void updateSuccess() { NrSuccess++; } private boolean checkIfSuccessConditionBroken() { boolean retval = false; if ( ( NrErrors > 0 && getSuccessCondition().equals( SUCCESS_IF_NO_ERRORS ) ) || ( NrErrors >= limitFiles && getSuccessCondition().equals( SUCCESS_IF_ERRORS_LESS ) ) ) { retval = true; } return retval; } private boolean getSuccessStatus() { boolean retval = false; if ( ( NrErrors == 0 && getSuccessCondition().equals( SUCCESS_IF_NO_ERRORS ) ) || ( NrSuccess >= limitFiles && getSuccessCondition().equals( SUCCESS_IF_AT_LEAST_X_FILES_UN_ZIPPED ) ) || ( NrErrors <= limitFiles && getSuccessCondition().equals( SUCCESS_IF_ERRORS_LESS ) ) ) { retval = true; } return retval; } private boolean takeThisFile( FileObject sourceFile, String destinationFile ) throws FileSystemException { boolean retval = false; File destination = new File( destinationFile ); if ( !destination.exists() ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.CanNotFindFile", destinationFile ) ); } return true; } if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileExists", destinationFile ) ); } if ( iffileexist == IF_FILE_EXISTS_SKIP ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileSkip", destinationFile ) ); } return false; } if ( iffileexist == IF_FILE_EXISTS_FAIL ) { updateErrors(); logError( BaseMessages.getString( PKG, "JobUnZip.Log.FileError", destinationFile, "" + NrErrors ) ); return false; } if ( iffileexist == IF_FILE_EXISTS_OVERWRITE ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileOverwrite", destinationFile ) ); } return true; } Long entrySize = sourceFile.getContent().getSize(); Long destinationSize = destination.length(); if ( iffileexist == IF_FILE_EXISTS_OVERWRITE_DIFF_SIZE ) { if ( entrySize != destinationSize ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileDiffSize.Diff", sourceFile.getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return true; } else { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileDiffSize.Same", sourceFile.getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return false; } } if ( iffileexist == IF_FILE_EXISTS_OVERWRITE_EQUAL_SIZE ) { if ( entrySize == destinationSize ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileEqualSize.Same", sourceFile.getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return true; } else { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileEqualSize.Diff", sourceFile.getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return false; } } if ( iffileexist == IF_FILE_EXISTS_OVERWRITE_ZIP_BIG ) { if ( entrySize > destinationSize ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileBigSize.Big", sourceFile.getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return true; } else { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileBigSize.Small", sourceFile.getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return false; } } if ( iffileexist == IF_FILE_EXISTS_OVERWRITE_ZIP_BIG_EQUAL ) { if ( entrySize >= destinationSize ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileBigEqualSize.Big", sourceFile .getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return true; } else { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileBigEqualSize.Small", sourceFile .getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return false; } } if ( iffileexist == IF_FILE_EXISTS_OVERWRITE_ZIP_SMALL ) { if ( entrySize < destinationSize ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileSmallSize.Small", sourceFile.getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return true; } else { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileSmallSize.Big", sourceFile.getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return false; } } if ( iffileexist == IF_FILE_EXISTS_OVERWRITE_ZIP_SMALL_EQUAL ) { if ( entrySize <= destinationSize ) { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileSmallEqualSize.Small", sourceFile .getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return true; } else { if ( log.isDebug() ) { logDebug( BaseMessages.getString( PKG, "JobUnZip.Log.FileSmallEqualSize.Big", sourceFile .getName().getURI(), "" + entrySize, destinationFile, "" + destinationSize ) ); } return false; } } if ( iffileexist == IF_FILE_EXISTS_UNIQ ) { // Create file with unique name return true; } return retval; } public boolean evaluates() { return true; } public static final int getIfFileExistsInt( String desc ) { for ( int i = 0; i < typeIfFileExistsCode.length; i++ ) { if ( typeIfFileExistsCode[i].equalsIgnoreCase( desc ) ) { return i; } } return 0; } public static final String getIfFileExistsCode( int i ) { if ( i < 0 || i >= typeIfFileExistsCode.length ) { return null; } return typeIfFileExistsCode[i]; } /** * @return Returns the iffileexist. */ public int getIfFileExist() { return iffileexist; } /** * @param setIfFileExist * The iffileexist to set. */ public void setIfFileExists( int iffileexist ) { this.iffileexist = iffileexist; } public boolean isCreateMoveToDirectory() { return createMoveToDirectory; } public void setCreateMoveToDirectory( boolean createMoveToDirectory ) { this.createMoveToDirectory = createMoveToDirectory; } public void setZipFilename( String zipFilename ) { this.zipFilename = zipFilename; } public void setWildcard( String wildcard ) { this.wildcard = wildcard; } public void setWildcardExclude( String wildcardexclude ) { this.wildcardexclude = wildcardexclude; } public void setSourceDirectory( String targetdirectoryin ) { this.sourcedirectory = targetdirectoryin; } public void setMoveToDirectory( String movetodirectory ) { this.movetodirectory = movetodirectory; } public String getSourceDirectory() { return sourcedirectory; } public String getMoveToDirectory() { return movetodirectory; } public String getZipFilename() { return zipFilename; } public String getWildcardSource() { return wildcardSource; } public void setWildcardSource( String wildcardSource ) { this.wildcardSource = wildcardSource; } public String getWildcard() { return wildcard; } public String getWildcardExclude() { return wildcardexclude; } public void setAddFileToResult( boolean addfiletoresultin ) { this.addfiletoresult = addfiletoresultin; } public boolean isAddFileToResult() { return addfiletoresult; } public void setDateInFilename( boolean adddate ) { this.adddate = adddate; } public void setAddOriginalTimestamp( boolean addOriginalTimestamp ) { this.addOriginalTimestamp = addOriginalTimestamp; } public boolean isOriginalTimestamp() { return addOriginalTimestamp; } public void setOriginalModificationDate( boolean setOriginalModificationDate ) { this.setOriginalModificationDate = setOriginalModificationDate; } public boolean isOriginalModificationDate() { return setOriginalModificationDate; } public boolean isDateInFilename() { return adddate; } public void setTimeInFilename( boolean addtime ) { this.addtime = addtime; } public boolean isTimeInFilename() { return addtime; } public boolean isSpecifyFormat() { return SpecifyFormat; } public void setSpecifyFormat( boolean SpecifyFormat ) { this.SpecifyFormat = SpecifyFormat; } public String getDateTimeFormat() { return date_time_format; } public void setDateTimeFormat( String date_time_format ) { this.date_time_format = date_time_format; } public void setDatafromprevious( boolean isfromprevious ) { this.isfromprevious = isfromprevious; } public boolean getDatafromprevious() { return isfromprevious; } public void setCreateRootFolder( boolean rootzip ) { this.rootzip = rootzip; } public boolean isCreateRootFolder() { return rootzip; } public void setCreateFolder( boolean createfolder ) { this.createfolder = createfolder; } public boolean isCreateFolder() { return createfolder; } public void setLimit( String nr_limitin ) { this.nr_limit = nr_limitin; } public String getLimit() { return nr_limit; } public void setSuccessCondition( String success_condition ) { this.success_condition = success_condition; } public String getSuccessCondition() { return success_condition; } /** * @param string * the filename from * * @return the calculated target filename */ protected String getTargetFilename( FileObject file ) throws FileSystemException { String retval = ""; String filename = file.getName().getPath(); // Replace possible environment variables... if ( filename != null ) { retval = filename; } if ( file.getType() != FileType.FILE ) { return retval; } if ( !SpecifyFormat && !adddate && !addtime ) { return retval; } int lenstring = retval.length(); int lastindexOfDot = retval.lastIndexOf( '.' ); if ( lastindexOfDot == -1 ) { lastindexOfDot = lenstring; } retval = retval.substring( 0, lastindexOfDot ); if ( daf == null ) { daf = new SimpleDateFormat(); } Date timestamp = new Date(); if ( addOriginalTimestamp ) { timestamp = new Date( file.getContent().getLastModifiedTime() ); } if ( SpecifyFormat && !Utils.isEmpty( date_time_format ) ) { if ( !dateFormatSet ) { daf.applyPattern( date_time_format ); } String dt = daf.format( timestamp ); retval += dt; } else { if ( adddate ) { if ( !dateFormatSet ) { daf.applyPattern( "yyyyMMdd" ); } String d = daf.format( timestamp ); retval += "_" + d; } if ( addtime ) { if ( !dateFormatSet ) { daf.applyPattern( "HHmmssSSS" ); } String t = daf.format( timestamp ); retval += "_" + t; } } if ( daf != null ) { dateFormatSet = true; } retval += filename.substring( lastindexOfDot, lenstring ); return retval; } @Override public void check( List<CheckResultInterface> remarks, JobMeta jobMeta, VariableSpace space, Repository repository, IMetaStore metaStore ) { ValidatorContext ctx1 = new ValidatorContext(); AbstractFileValidator.putVariableSpace( ctx1, getVariables() ); AndValidator.putValidators( ctx1, JobEntryValidatorUtils.notBlankValidator(), JobEntryValidatorUtils.fileDoesNotExistValidator() ); JobEntryValidatorUtils.andValidator().validate( this, "zipFilename", remarks, ctx1 ); if ( 2 == afterunzip ) { // setting says to move JobEntryValidatorUtils.andValidator().validate( this, "moveToDirectory", remarks, AndValidator.putValidators( JobEntryValidatorUtils.notBlankValidator() ) ); } JobEntryValidatorUtils.andValidator().validate( this, "sourceDirectory", remarks, AndValidator.putValidators( JobEntryValidatorUtils.notBlankValidator() ) ); } }