package edu.stanford.nlp.io; import edu.stanford.nlp.util.logging.Redwood; import java.io.File; import java.io.FileFilter; import java.util.*; /** * A <code>FileSequentialCollection</code> maintains a read-only * collection of <code>Files</code>. (It's a list, but we don't * make it a List or else one needs an iterator that can go backwards.) * It is built from a Collection of paths, or just from a single path. * Optionally one can also provide a <code>FileFilter</code> which is * applied over the files in a recursive traversal, or else * an extension and whether to do recursive traversal, which are used to * construct a filter. * Note that the Collection argument constructor will behave 'normally' * iff none of the Collection elements are directories. If they are * directories they will be recursed and files in them added. To get the * behavior of putting just directories in the collection one needs to * use the constructor * <code>FileSequentialCollection(c, failFilt, true)</code>, * where <code>failFilt</code> * is a user-supplied <code>FileFilter</code> that accepts no files. * The <code>FileSequentialCollection</code> builds from these * constructor arguments a collection of <code>Files</code>, which can be * iterated over, etc. This class does runtime expansion of paths. * That is, it is optimized for iteration and not for random access. * It is also an unmodifiable Collection. * <p/> * The class provides some additional constructors beyond the two recommended * by the Collections package, to allow specifying a <code>FileFilter</code> * and similar options. Nevertheless, so as to avoid overburdening the * the API, not every possibly useful constructor has been provided where * these can be easily synthesized using standard Collections package * facilities. Useful idioms to know are: * <ul> * <li>To make a <code>FileSequentialCollection</code> from an array of * <code>Files</code> or <code>Strings</code> <code>arr</code>:<br> * <code>FileSequentialCollection fcollect = new FileSequentialCollection(Arrays.asList(arr)); * </code></li> * <li>To make a <code>FileSequentialCollection</code> from a single * <code>File</code> or <code>String</code> fi:<br> * <code>FileSequentialCollection fcollect = * new FileSequentialCollection(Collections.singletonList(fi));</code></li> * </ul> * This class will throw an <code>IllegalArgumentException</code> if there * are things that are not existing Files or String paths to existing files * in the input collection (from the Iterator). * * @author Christopher Manning * @version 1.0, August 2002 * @see FileArrayList */ public class FileSequentialCollection extends AbstractCollection<File> { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(FileSequentialCollection.class); /** * Stores the input collection over which we work. This is * commonly a brief summary of a full set of files. */ private Collection<?> coll; /** * A filter for files to match. */ private FileFilter filt; private boolean includeDirs; /** * Creates an empty <code>FileSequentialCollection</code>, with no Files * in it. Since a <code>FileSequentialCollection</code> is not * modifiable, this is * largely useless (except if you want an empty one). */ public FileSequentialCollection() { this(null); } /** * Creates a <code>FileSequentialCollection</code> from the passed in * <code>Collection</code>. The constructor iterates through the * collection. For each element, if it is a <code>File</code> or * <code>String</code>, then this file path is traversed for addition * to the collection. If the argument is of some other type, an * <code>IllegalArgumentException</code> is thrown. * For each <code>File</code> or <code>String</code>, if they * do not correspond to directories, then they are added to the * collection; if they do, they are recursively explored and all * non-directories within them are added to the collection. * * @param c The collection to build the * <code>FileSequentialCollection</code> from */ public FileSequentialCollection(Collection<?> c) { this(c, null); } /** * Creates a <code>FileSequentialCollection</code> from the passed in * <code>File</code> path. If the <code>File</code> * does not correspond to a directory, then it is added to the * collection; if it does, it is explored. Files * that match the extension, and files in subfolders that match, if * appropriate, are added to the collection. * This is an additional convenience constructor. * * @param path file or directory to load from * @param suffix suffix (normally "File extension") of files to load * @param recursively true means descend into subdirectories as well */ public FileSequentialCollection(File path, String suffix, boolean recursively) { this(Collections.singletonList(path), suffix, recursively); } /** * Creates a <code>FileSequentialCollection</code> from the passed in * <code>Collection</code>. The constructor iterates through the * collection. For each element, if it is a <code>File</code>, then the * <code>File</code> is added to the collection, if it is a * <code>String</code>, then a <code>File</code> corresponding to this * <code>String</code> as a file path is added to the collection, and * if the argument is of some other type, an * <code>IllegalArgumentException</code> is thrown. For the files * thus specified, they are included in the collection only if they * match an extension filter as specified by the other arguments. * * @param c Collection of files or directories as Files or Strings * @param suffix suffix (normally "File extension") of files to load * @param recursively true means descend into subdirectories as well */ public FileSequentialCollection(Collection<?> c, String suffix, boolean recursively) { this(c, new ExtensionFileFilter(suffix, recursively), false); } /** * Creates a <code>FileSequentialCollection</code> from the passed in * <code>Collection</code>. The constructor iterates through the * collection. For each element, if it is a <code>File</code> or * <code>String</code> then these file paths are processed as * explained below. * If the argument is of some other type, an * <code>IllegalArgumentException</code> is thrown. For the files * specified, if they are not directories, they are included in the * collection. If they are directories, files inside them are * included iff they match the <code>FileFilter</code>. This will * include recursive directory descent iff the <code>FileFilter</code> * accepts directories. * If the path is a directory then only * files within the directory (perhaps recursively) that satisfy the * filter are processed. If the <code>path</code>is a file, then * that file is processed regardless of whether it satisfies the * filter. (This semantics was adopted, since otherwise there was no * easy way to go through all the files in a directory without * descending recursively via the specification of a * <code>FileFilter</code>.) * * @param c The collection of file or directory to load from * @param filt A FileFilter of files to load. This may be * <code>null</code>, in which case all files are accepted. */ public FileSequentialCollection(Collection<?> c, FileFilter filt) { this(c, filt, false); } public FileSequentialCollection(String filename, FileFilter filt) { this(Collections.singletonList(filename), filt); } /** * Creates a <code>FileSequentialCollection</code> from the passed in * <code>Collection</code>. The constructor iterates through the * collection. For each element, if it is a <code>File</code> or * <code>String</code> then these file paths are processed as * explained below. * If the argument is of some other type, an * <code>IllegalArgumentException</code> is thrown. For the files * specified, if they are not directories, they are included in the * collection. If they are directories, files inside them are * included iff they match the <code>FileFilter</code>. This will * include recursive directory descent iff the <code>FileFilter</code> * accepts directories. * If the path is a directory then only * files within the directory (perhaps recursively) that satisfy the * filter are processed. If the <code>path</code>is a file, then * that file is processed regardless of whether it satisfies the * filter. (This semantics was adopted, since otherwise there was no * easy way to go through all the files in a directory without * descending recursively via the specification of a * <code>FileFilter</code>.) * * @param c The collection of file or directory to load from. An * argument of <code>null</code> is interpreted like an * empty collection. * @param filt A FileFilter of files to load. This may be * <code>null</code>, in which case all files are accepted * @param includeDirs Whether to include directory names in the file list */ public FileSequentialCollection(Collection<?> c, FileFilter filt, boolean includeDirs) { super(); // store the arguments. They are expanded by the iterator if (c == null) { coll = new ArrayList<>(); } else { coll = c; } this.filt = filt; this.includeDirs = includeDirs; } /** * Returns the size of the FileSequentialCollection. * * @return size How many files are in the collection */ @SuppressWarnings({"UnusedDeclaration","unused"}) @Override public int size() { int counter = 0; for (File f : this) { counter++; } return counter; } /** * Return an Iterator over files in the collection. * This version lazily works its way down directories. */ @Override public Iterator<File> iterator() { return new FileSequentialCollectionIterator(); } /** * This is the iterator that gets returned */ private final class FileSequentialCollectionIterator implements Iterator<File> { // current state is a rootsIterator, a position in a recursion // under a directory listing, and a pointer in the current // directory. private Object[] roots; // these may be of type File or String private int rootsIndex; // these next two simulate a list of pairs, but I was too lazy to // make an extra class private Stack<Object> fileArrayStack; private Stack<Integer> fileArrayStackIndices; private File next; public FileSequentialCollectionIterator() { // log.info("Coll is " + coll); roots = coll.toArray(); rootsIndex = 0; fileArrayStack = new Stack<>(); fileArrayStackIndices = new Stack<>(); if (roots.length > 0) { fileArrayStack.add(roots[rootsIndex]); fileArrayStackIndices.push(Integer.valueOf(0)); } next = primeNextFile(); } public boolean hasNext() { return next != null; } /** * Returns the next element in the iteration. */ public File next() { if (next == null) { throw new NoSuchElementException("FileSequentialCollection exhausted"); } File ret = next; next = primeNextFile(); return ret; } /** * Not supported */ public void remove() { throw new UnsupportedOperationException(); } /** * Returns the next file to be accessed, or <code>null</code> if * there are none left. This is all quite hairy to write as an * iterator.... * * @return The next file */ private File primeNextFile() { while (rootsIndex < roots.length) { while (!fileArrayStack.empty()) { // log.info("fileArrayStack: " + fileArrayStack); Object obj = fileArrayStack.peek(); if (obj instanceof File[]) { // log.info("Got a File[]"); File[] files = (File[]) obj; Integer index = fileArrayStackIndices.pop(); int ind = index.intValue(); if (ind < files.length) { index = Integer.valueOf(ind + 1); fileArrayStackIndices.push(index); fileArrayStack.push(files[ind]); // loop around to process this new file } else { // this directory is finished and we pop up fileArrayStack.pop(); } } else { // take it off the stack: tail recursion optimization fileArrayStack.pop(); if (obj instanceof String) { obj = new File((String) obj); } if (!(obj instanceof File)) { throw new IllegalArgumentException("Collection elements must be Files or Strings"); } File path = (File) obj; if (path.isDirectory()) { // log.info("Got directory " + path); // if path is a directory, look into it File[] directoryListing = path.listFiles(filt); if (directoryListing == null) { throw new IllegalArgumentException("Directory access problem for: " + path); } // log.info(" with " + // directoryListing.length + " files in it."); if (includeDirs) { // log.info("Include dir as answer"); if (directoryListing.length > 0) { fileArrayStack.push(directoryListing); fileArrayStackIndices.push(Integer.valueOf(0)); } return path; } else { // we don't include the dir, so we'll push // the directory and loop around again ... if (directoryListing.length > 0) { fileArrayStack.push(directoryListing); fileArrayStackIndices.push(Integer.valueOf(0)); } // otherwise there was nothing in the // directory; we will pop back up } } else { // it's just a fixed file // log.info("Got a plain file " + path); if (!path.exists()) { throw new IllegalArgumentException("File doesn't exist: " + path); } return path; } } // go through loop again. we've pushed or popped as needed } // finished this root entry; go on to the next rootsIndex++; if (rootsIndex < roots.length) { fileArrayStack.add(roots[rootsIndex]); fileArrayStackIndices.push(Integer.valueOf(0)); } } // finished everything return null; } } /** * This is simply a debugging aid that tests the functionality of * the class. The supplied arguments are put in a * <code>Collection</code>, and passed to the * <code>FileSequentialCollection</code> constructor. * An iterator is then used to print the names of all the files * (but not directories) in the collection. * * @param args A list of file paths */ public static void main(String[] args) { FileSequentialCollection fcollect = new FileSequentialCollection(Arrays.asList(args)); for (File fi: fcollect) { System.out.println(fi); } // test the other constructors System.out.println("Above was Collection constructor"); System.out.println("Empty constructor"); FileSequentialCollection fcollect2 = new FileSequentialCollection(); for (File fi : fcollect2) { System.out.println(fi); } System.out.println("File String(mrg) boolean(true) constructor"); FileSequentialCollection fcollect3 = new FileSequentialCollection(new File(args[0]), "mrg", true); for (File fi : fcollect3) { System.out.println(fi); } System.out.println("Collection String(mrg) boolean constructor"); FileSequentialCollection fcollect4 = new FileSequentialCollection(Arrays.asList(args), "mrg", true); for (File fi: fcollect4) { System.out.println(fi); } System.out.println("Testing number range file filter"); FileSequentialCollection fcollect5 = new FileSequentialCollection(Arrays.asList(args), new NumberRangeFileFilter(320, 410, true)); for (File fi: fcollect5) { System.out.println(fi); } System.out.println("Testing null filter but include dirs"); FileSequentialCollection fcollect6 = new FileSequentialCollection(Arrays.asList(args), (FileFilter) null, true); for (File fi : fcollect6) { System.out.println(fi); } } }