/* * Copyright 2007 T-Rank AS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package no.trank.openpipe.reader; import java.io.File; import java.io.FileFilter; import java.util.ArrayDeque; import java.util.Arrays; import java.util.Comparator; import java.util.Deque; import java.util.Iterator; import java.util.NoSuchElementException; import no.trank.openpipe.api.document.Document; import no.trank.openpipe.api.document.DocumentOperation; import no.trank.openpipe.api.document.DocumentProducer; import no.trank.openpipe.api.document.FileRawData; import no.trank.openpipe.util.AcceptAllFileFilter; import no.trank.openpipe.util.FilesFirstComparator; import no.trank.openpipe.util.RegexFileFilter; /** * @version $Revision$ */ public class FileDocumentReader implements DocumentProducer { private static final FileFilter ACCEPT_ALL = new AcceptAllFileFilter(); private static final Comparator<File> FILES_FIRST_COMPARATOR = new FilesFirstComparator(); private FileDocReader reader; private File directory; private String fileNameField = "fileName"; private String pathField = "pathName"; private int maxDepth = 1; private FileFilter filter; private String regexPattern; @Override public final void init() { if (!directory.isDirectory()) { throw new IllegalArgumentException("'" + directory + "' is not a directory"); } if (!directory.canRead()) { throw new IllegalArgumentException("Can not read directory '" + directory + "'"); } if (filter == null && regexPattern != null) { filter = new RegexFileFilter(regexPattern); } reader = createReader(); } @Override public void close() { reader = null; } @Override public void fail() { close(); } protected FileDocReader createReader() { return new FileDocReader() { @Override public Document getDocument(File file) { return new Document(new FileRawData(file)); } }; } @Override public Iterator<Document> iterator() { final int depth = maxDepth < 0 ? Integer.MAX_VALUE : maxDepth; return new FileIterator(directory, depth, new DocReader(reader, fileNameField, pathField), filter); } public void setDirectory(String directory) { this.directory = new File(directory).getAbsoluteFile(); } public String getDirectory() { return directory.getPath(); } public String getFileNameField() { return fileNameField; } public void setFileNameField(String fileNameField) { this.fileNameField = fileNameField; } public String getPathField() { return pathField; } public void setPathField(String pathField) { this.pathField = pathField; } public int getMaxDepth() { return maxDepth; } /** * Sets the max depth to go into directory structure. Setting <tt>maxDepth < 0</tt> means no limit. Default value * is <tt>1</tt>. * * @param maxDepth the max depth to go into directory structure. */ public void setMaxDepth(int maxDepth) { this.maxDepth = maxDepth; } public FileFilter getFilter() { return filter; } public void setFilter(FileFilter filter) { this.filter = filter; } public String getRegexPattern() { return regexPattern; } /** * Sets a regular expression that will be run against the filename of each file found in the specified directory(s). * * If you set this only filenames that matches the regular expression will be included. * * @param regexPattern the pattern to match against */ public void setRegexPattern(String regexPattern) { this.regexPattern = regexPattern; } public static interface FileDocReader { Document getDocument(File file); } private static class DocReader implements FileDocReader { private final FileDocReader reader; private final String fileNameField; private final String pathField; public DocReader(FileDocReader reader, String fileNameField, String pathField) { this.reader = reader; this.fileNameField = fileNameField; this.pathField = pathField; } @Override public Document getDocument(File file) { final Document doc = reader.getDocument(file); if (fileNameField != null) { doc.setFieldValue(fileNameField, file.getName()); } if (pathField != null) { doc.setFieldValue(pathField, file.getPath()); } if (doc.getOperation() == null) { doc.setOperation(DocumentOperation.ADD_VALUE); } return doc; } } private static class FileIterator implements Iterator<Document> { private final FileDocReader reader; private final FileFilter fileFilter; private final Deque<Iterator<File>> stack; private final int maxDepth; private Iterator<File> fileIt; private File file; private final FileOnlyFileFilter onlyFileFilter; public FileIterator(File file, int maxDepth, FileDocReader reader, FileFilter fileFilter) { this.reader = reader; if (fileFilter != null) { this.fileFilter = fileFilter; } else { this.fileFilter = ACCEPT_ALL; } this.maxDepth = maxDepth - 1; stack = new ArrayDeque<Iterator<File>>(); onlyFileFilter = new FileOnlyFileFilter(this.fileFilter); fileIt = getFiles(file); } private Iterator<File> getFiles(File file) { final File[] files; if (stack.size() >= maxDepth) { files = file.listFiles(onlyFileFilter); Arrays.sort(files); } else { files = file.listFiles(fileFilter); Arrays.sort(files, FILES_FIRST_COMPARATOR); } return Arrays.asList(files).iterator(); } @Override public boolean hasNext() { findNextFile(); return file != null; } private void findNextFile() { if (file == null) { if (findFileIt()) { file = fileIt.next(); while (file != null && file.isDirectory()) { stack.push(fileIt); fileIt = getFiles(file); file = findFileIt() ? fileIt.next() : null; } } } } private boolean findFileIt() { while (!fileIt.hasNext() && !stack.isEmpty()) { fileIt = stack.pop(); } return fileIt.hasNext(); } @Override public Document next() { if (!hasNext()) { throw new NoSuchElementException(); } try { return reader.getDocument(file); } finally { file = null; } } @Override public void remove() { throw new UnsupportedOperationException(); } } private static class FileOnlyFileFilter implements FileFilter { private final FileFilter fileFilter; public FileOnlyFileFilter(FileFilter fileFilter) { this.fileFilter = fileFilter; } @Override public boolean accept(File file) { return !file.isDirectory() && fileFilter.accept(file); } } }