/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.core.fileinput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.vfs2.AllFileSelector;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSelectInfo;
import org.apache.commons.vfs2.FileType;
import org.apache.commons.vfs2.provider.sftp.SftpFileObject;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.logging.LogChannel;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.core.vfs.SftpFileObjectWithWindowsSupport;
import org.pentaho.di.core.vfs.SftpFileSystemWindowsProvider;
public class FileInputList {
private List<FileObject> files = new ArrayList<FileObject>();
private List<FileObject> nonExistantFiles = new ArrayList<FileObject>( 1 );
private List<FileObject> nonAccessibleFiles = new ArrayList<FileObject>( 1 );
private static LogChannelInterface log = new LogChannel( "FileInputList" );
public enum FileTypeFilter {
FILES_AND_FOLDERS( "all_files", FileType.FILE, FileType.FOLDER ), ONLY_FILES( "only_files", FileType.FILE ),
ONLY_FOLDERS( "only_folders", FileType.FOLDER );
private String name;
private final Collection<FileType> allowedFileTypes;
private FileTypeFilter( String name, FileType... allowedFileTypes ) {
this.name = name;
this.allowedFileTypes = Collections.unmodifiableCollection( Arrays.asList( allowedFileTypes ) );
}
public boolean isFileTypeAllowed( FileType fileType ) {
return allowedFileTypes.contains( fileType );
}
@Override
public String toString() {
return name;
}
public static FileTypeFilter getByOrdinal( int ordinal ) {
for ( FileTypeFilter filter : FileTypeFilter.values() ) {
if ( filter.ordinal() == ordinal ) {
return filter;
}
}
return ONLY_FILES;
}
public static FileTypeFilter getByName( String name ) {
for ( FileTypeFilter filter : FileTypeFilter.values() ) {
if ( filter.name.equals( name ) ) {
return filter;
}
}
return ONLY_FILES;
}
}
private static final String YES = "Y";
public static String getRequiredFilesDescription( List<FileObject> nonExistantFiles ) {
StringBuilder buffer = new StringBuilder();
for ( Iterator<FileObject> iter = nonExistantFiles.iterator(); iter.hasNext(); ) {
FileObject file = iter.next();
buffer.append( file.getName().getURI() );
buffer.append( Const.CR );
}
return buffer.toString();
}
private static boolean[] includeSubdirsFalse( int iLength ) {
boolean[] includeSubdirs = new boolean[iLength];
for ( int i = 0; i < iLength; i++ ) {
includeSubdirs[i] = false;
}
return includeSubdirs;
}
public static String[] createFilePathList( VariableSpace space, String[] fileName, String[] fileMask,
String[] excludeFileMask, String[] fileRequired ) {
boolean[] includeSubdirs = includeSubdirsFalse( fileName.length );
return createFilePathList( space, fileName, fileMask, excludeFileMask, fileRequired, includeSubdirs, null );
}
public static String[] createFilePathList( VariableSpace space, String[] fileName, String[] fileMask,
String[] excludeFileMask, String[] fileRequired, boolean[] includeSubdirs ) {
return createFilePathList( space, fileName, fileMask, excludeFileMask, fileRequired, includeSubdirs, null );
}
public static String[] createFilePathList( VariableSpace space, String[] fileName, String[] fileMask,
String[] excludeFileMask, String[] fileRequired, boolean[] includeSubdirs, FileTypeFilter[] filters ) {
List<FileObject> fileList =
createFileList( space, fileName, fileMask, excludeFileMask, fileRequired, includeSubdirs, filters )
.getFiles();
String[] filePaths = new String[fileList.size()];
for ( int i = 0; i < filePaths.length; i++ ) {
filePaths[i] = fileList.get( i ).getName().getURI();
}
return filePaths;
}
public static FileInputList createFileList( VariableSpace space, String[] fileName, String[] fileMask,
String[] excludeFileMask, String[] fileRequired ) {
boolean[] includeSubdirs = includeSubdirsFalse( fileName.length );
return createFileList( space, fileName, fileMask, excludeFileMask, fileRequired, includeSubdirs, null );
}
public static FileInputList createFileList( VariableSpace space, String[] fileName, String[] fileMask,
String[] excludeFileMask, String[] fileRequired, boolean[] includeSubdirs ) {
return createFileList( space, fileName, fileMask, excludeFileMask, fileRequired, includeSubdirs, null );
}
public static FileInputList createFileList( VariableSpace space, String[] fileName, String[] fileMask,
String[] excludeFileMask, String[] fileRequired, boolean[] includeSubdirs, FileTypeFilter[] fileTypeFilters ) {
FileInputList fileInputList = new FileInputList();
// Replace possible environment variables...
final String[] realfile = space.environmentSubstitute( fileName );
final String[] realmask = space.environmentSubstitute( fileMask );
final String[] realExcludeMask = space.environmentSubstitute( excludeFileMask );
for ( int i = 0; i < realfile.length; i++ ) {
final String onefile = realfile[i];
final String onemask = realmask[i];
final String excludeonemask = realExcludeMask[i];
final boolean onerequired = YES.equalsIgnoreCase( fileRequired[i] );
final boolean subdirs = includeSubdirs[i];
final FileTypeFilter filter =
( ( fileTypeFilters == null || fileTypeFilters[i] == null )
? FileTypeFilter.ONLY_FILES : fileTypeFilters[i] );
if ( Utils.isEmpty( onefile ) ) {
continue;
}
//
// If a wildcard is set we search for files
//
if ( !Utils.isEmpty( onemask ) || !Utils.isEmpty( excludeonemask ) ) {
try {
FileObject directoryFileObject = KettleVFS.getFileObject( onefile, space );
boolean processFolder = true;
if ( onerequired ) {
if ( !directoryFileObject.exists() ) {
// if we don't find folder..no need to continue
fileInputList.addNonExistantFile( directoryFileObject );
processFolder = false;
} else {
if ( !directoryFileObject.isReadable() ) {
fileInputList.addNonAccessibleFile( directoryFileObject );
processFolder = false;
}
}
}
// Find all file names that match the wildcard in this directory
//
if ( processFolder ) {
if ( directoryFileObject != null && directoryFileObject.getType() == FileType.FOLDER ) { // it's a directory
FileObject[] fileObjects = directoryFileObject.findFiles( new AllFileSelector() {
@Override
public boolean traverseDescendents( FileSelectInfo info ) {
return info.getDepth() == 0 || subdirs;
}
@Override
public boolean includeFile( FileSelectInfo info ) {
// Never return the parent directory of a file list.
if ( info.getDepth() == 0 ) {
return false;
}
FileObject fileObject = info.getFile();
try {
if ( fileObject != null && filter.isFileTypeAllowed( fileObject.getType() ) ) {
String name = info.getFile().getName().getBaseName();
boolean matches = true;
if ( !Utils.isEmpty( onemask ) ) {
matches = Pattern.matches( onemask, name );
}
boolean excludematches = false;
if ( !Utils.isEmpty( excludeonemask ) ) {
excludematches = Pattern.matches( excludeonemask, name );
}
return ( matches && !excludematches );
}
return false;
} catch ( IOException ex ) {
// Upon error don't process the file.
return false;
}
}
} );
if ( fileObjects != null ) {
for ( int j = 0; j < fileObjects.length; j++ ) {
FileObject fileObject = fileObjects[j];
if ( fileObject instanceof SftpFileObject ) {
fileObject = new SftpFileObjectWithWindowsSupport( (SftpFileObject) fileObject,
SftpFileSystemWindowsProvider.getSftpFileSystemWindows( (SftpFileObject) fileObject ) );
}
if ( fileObject.exists() ) {
fileInputList.addFile( fileObject );
}
}
}
if ( Utils.isEmpty( fileObjects ) ) {
if ( onerequired ) {
fileInputList.addNonAccessibleFile( directoryFileObject );
}
}
// Sort the list: quicksort, only for regular files
fileInputList.sortFiles();
} else {
FileObject[] children = directoryFileObject.getChildren();
for ( int j = 0; j < children.length; j++ ) {
// See if the wildcard (regexp) matches...
String name = children[j].getName().getBaseName();
boolean matches = true;
if ( !Utils.isEmpty( onemask ) ) {
matches = Pattern.matches( onemask, name );
}
boolean excludematches = false;
if ( !Utils.isEmpty( excludeonemask ) ) {
excludematches = Pattern.matches( excludeonemask, name );
}
if ( matches && !excludematches ) {
fileInputList.addFile( children[j] );
}
}
// We don't sort here, keep the order of the files in the archive.
}
}
} catch ( Exception e ) {
if ( onerequired ) {
fileInputList.addNonAccessibleFile( new NonAccessibleFileObject( onefile ) );
}
log.logError( Const.getStackTracker( e ) );
}
} else { // A normal file...
try {
FileObject fileObject = KettleVFS.getFileObject( onefile, space );
if ( fileObject.exists() ) {
if ( fileObject.isReadable() ) {
fileInputList.addFile( fileObject );
} else {
if ( onerequired ) {
fileInputList.addNonAccessibleFile( fileObject );
}
}
} else {
if ( onerequired ) {
fileInputList.addNonExistantFile( fileObject );
}
}
} catch ( Exception e ) {
if ( onerequired ) {
fileInputList.addNonAccessibleFile( new NonAccessibleFileObject( onefile ) );
}
log.logError( Const.getStackTracker( e ) );
}
}
}
return fileInputList;
}
public static FileInputList createFolderList( VariableSpace space, String[] folderName, String[] folderRequired ) {
FileInputList fileInputList = new FileInputList();
// Replace possible environment variables...
final String[] realfolder = space.environmentSubstitute( folderName );
for ( int i = 0; i < realfolder.length; i++ ) {
final String onefile = realfolder[i];
final boolean onerequired = YES.equalsIgnoreCase( folderRequired[i] );
final boolean subdirs = true;
final FileTypeFilter filter = FileTypeFilter.ONLY_FOLDERS;
if ( Utils.isEmpty( onefile ) ) {
continue;
}
FileObject directoryFileObject = null;
try {
// Find all folder names in this directory
//
directoryFileObject = KettleVFS.getFileObject( onefile, space );
if ( directoryFileObject != null && directoryFileObject.getType() == FileType.FOLDER ) { // it's a directory
FileObject[] fileObjects = directoryFileObject.findFiles( new AllFileSelector() {
@Override
public boolean traverseDescendents( FileSelectInfo info ) {
return info.getDepth() == 0 || subdirs;
}
@Override
public boolean includeFile( FileSelectInfo info ) {
// Never return the parent directory of a file list.
if ( info.getDepth() == 0 ) {
return false;
}
FileObject fileObject = info.getFile();
try {
if ( fileObject != null && filter.isFileTypeAllowed( fileObject.getType() ) ) {
return true;
}
return false;
} catch ( IOException ex ) {
// Upon error don't process the file.
return false;
}
}
} );
if ( fileObjects != null ) {
for ( int j = 0; j < fileObjects.length; j++ ) {
if ( fileObjects[j].exists() ) {
fileInputList.addFile( fileObjects[j] );
}
}
}
if ( Utils.isEmpty( fileObjects ) ) {
if ( onerequired ) {
fileInputList.addNonAccessibleFile( directoryFileObject );
}
}
// Sort the list: quicksort, only for regular files
fileInputList.sortFiles();
} else {
if ( onerequired && !directoryFileObject.exists() ) {
fileInputList.addNonExistantFile( directoryFileObject );
}
}
} catch ( Exception e ) {
log.logError( Const.getStackTracker( e ) );
} finally {
try {
if ( directoryFileObject != null ) {
directoryFileObject.close();
}
directoryFileObject = null;
} catch ( Exception e ) {
// Ignore
}
}
}
return fileInputList;
}
public List<FileObject> getFiles() {
return files;
}
public String[] getFileStrings() {
String[] fileStrings = new String[files.size()];
for ( int i = 0; i < fileStrings.length; i++ ) {
fileStrings[i] = KettleVFS.getFilename( files.get( i ) );
}
return fileStrings;
}
public List<FileObject> getNonAccessibleFiles() {
return nonAccessibleFiles;
}
public List<FileObject> getNonExistantFiles() {
return nonExistantFiles;
}
public void addFile( FileObject file ) {
files.add( file );
}
public void addNonAccessibleFile( FileObject file ) {
nonAccessibleFiles.add( file );
}
public void addNonExistantFile( FileObject file ) {
nonExistantFiles.add( file );
}
public void sortFiles() {
Collections.sort( files, KettleVFS.getComparator() );
Collections.sort( nonAccessibleFiles, KettleVFS.getComparator() );
Collections.sort( nonExistantFiles, KettleVFS.getComparator() );
}
/*
* private boolean containsComparable(List list) { if (list == null || list.size() == 0) return false;
*
* return (list.get(0) instanceof Comparable); }
*/
public FileObject getFile( int i ) {
return files.get( i );
}
public int nrOfFiles() {
return files.size();
}
public int nrOfMissingFiles() {
return nonAccessibleFiles.size() + nonExistantFiles.size();
}
public static FileInputList createFileList( VariableSpace space, String[] fileName, String[] fileMask,
String[] fileRequired, boolean[] includeSubdirs ) {
return createFileList(
space, fileName, fileMask, new String[fileName.length], fileRequired, includeSubdirs, null );
}
public static String[] createFilePathList( VariableSpace space, String[] fileName, String[] fileMask,
String[] fileRequired ) {
boolean[] includeSubdirs = includeSubdirsFalse( fileName.length );
return createFilePathList(
space, fileName, fileMask, new String[fileName.length], fileRequired, includeSubdirs, null );
}
}