/**
* Copyright (C) 2010 Orbeon, Inc.
*
* This program is free software; you can redistribute it and/or modify it under the terms of the
* GNU Lesser General Public License as published by the Free Software Foundation; either version
* 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
*/
package org.orbeon.oxf.processor.generator;
import com.drew.imaging.jpeg.JpegSegmentData;
import com.drew.imaging.jpeg.JpegSegmentReader;
import com.drew.imaging.jpeg.JpegSegmentType;
import com.drew.lang.StreamReader;
import com.drew.metadata.Directory;
import com.drew.metadata.Metadata;
import com.drew.metadata.MetadataException;
import com.drew.metadata.Tag;
import com.drew.metadata.exif.ExifReader;
import com.drew.metadata.iptc.IptcReader;
import org.orbeon.dom.Document;
import org.orbeon.dom.Node;
import org.orbeon.oxf.common.Defaults;
import org.orbeon.oxf.common.OXFException;
import org.orbeon.oxf.http.Headers;
import org.orbeon.oxf.pipeline.api.PipelineContext;
import org.orbeon.oxf.processor.*;
import org.orbeon.oxf.util.DateUtils;
import org.orbeon.oxf.util.NetUtils;
import org.orbeon.oxf.util.NumberUtils;
import org.orbeon.oxf.util.StringUtils;
import org.orbeon.oxf.xml.XMLReceiver;
import org.orbeon.oxf.xml.XMLReceiverHelper;
import org.orbeon.oxf.xml.XPathUtils;
import org.orbeon.oxf.xml.dom4j.LocationData;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* This processor scans a filesystem hierarchy and retrieves information about each file. The
* implementation is based on a modified version of the ant directory scanner.
*
* Ideas for improvements:
*
* o componentize so that file identification and enhanced file information (such as images) can be
* plugged
*
* o you could imagine, for XML files, to extract content with XPath expressions
*/
public class DirectoryScannerProcessor extends ProcessorImpl {
public static final String DIRECTORY_GENERATOR_NAMESPACE_URI = "http://www.orbeon.org/oxf/directory-generator";
private static final String DIRECTORY_ELEMENT = "directory";
private static final String FILE_ELEMENT = "file";
private static final String EXIF_ELEMENT = "exif-info";
private static final String IPTC_ELEMENT = "iptc-info";
private static final String TAG_ELEMENT = "param";
private static final boolean DEFAULT_CASE_SENSITIVE = true;
private static final boolean DEFAULT_DEFAULT_EXCLUDES = false;
private static final boolean DEFAULT_BASIC_INFO = false;
private static final boolean DEFAULT_EXIF_INFO = false;
private static final boolean DEFAULT_IPTC_INFO = false;
public DirectoryScannerProcessor() {
addInputInfo(new ProcessorInputOutputInfo(INPUT_CONFIG, DIRECTORY_GENERATOR_NAMESPACE_URI));
addOutputInfo(new ProcessorInputOutputInfo(OUTPUT_DATA));
}
@Override
public ProcessorOutput createOutput(String name) {
ProcessorOutput output = new ProcessorOutputImpl(DirectoryScannerProcessor.this, name) {
public void readImpl(PipelineContext context, XMLReceiver xmlReceiver) {
// Read config
final Config config = readCacheInputAsObject(context, getInputByName(INPUT_CONFIG), new CacheableInputReader<Config>() {
public Config read(PipelineContext context, ProcessorInput input) {
final Document configNode = readInputAsOrbeonDom(context, input);
final Config config = new Config();
final String baseDirectoryURLString = StringUtils.trimAllToEmpty(XPathUtils.selectStringValueNormalize(configNode, "/config/base-directory"));
// Use location data if present so that relative URLs can be supported
final LocationData locationData = getLocationData();
final String realPath = NetUtils.getRealPath(baseDirectoryURLString, locationData);
config.setBaseDirectory(realPath);
for (Iterator i = XPathUtils.selectNodeIterator(configNode, "/config/include"); i.hasNext();) {
final Node node = (Node) i.next();
final String value = XPathUtils.selectStringValueNormalize(node, ".");
if (value != null)
config.addInclude(value);
}
for (Iterator i = XPathUtils.selectNodeIterator(configNode, "/config/exclude"); i.hasNext();) {
final Node node = (Node) i.next();
final String value = XPathUtils.selectStringValueNormalize(node, ".");
if (value != null)
config.addExclude(value);
}
final boolean caseSensitive = ProcessorUtils.selectBooleanValue(configNode, "/config/case-sensitive", DEFAULT_CASE_SENSITIVE);
config.setCaseSensitive(caseSensitive);
final boolean defaultExcludes = ProcessorUtils.selectBooleanValue(configNode, "/config/default-excludes", DEFAULT_DEFAULT_EXCLUDES);
config.setDefaultExcludes(defaultExcludes);
final boolean basicInfo = ProcessorUtils.selectBooleanValue(configNode, "/config/image-metadata/basic-info", DEFAULT_BASIC_INFO);
config.setBasicInfo(basicInfo);
final boolean exifInfo = ProcessorUtils.selectBooleanValue(configNode, "/config/image-metadata/exif-info", DEFAULT_EXIF_INFO);
config.setExifInfo(exifInfo);
final boolean iptcInfo = ProcessorUtils.selectBooleanValue(configNode, "/config/image-metadata/iptc-info", DEFAULT_IPTC_INFO);
config.setIptcInfo(iptcInfo);
// TODO: sorting
// TODO: use-ant-patterns (default and only currently supported), follow-symlinks
// TODO: more generalized content-type detection
// TODO: WebDAV support, and/or integration with resource manager
// TODO: option to list excluded and not-included?
return config;
}
});
// Create and configure directory scanner
final DirectoryScanner ds = new DirectoryScanner();
if (config.isDefaultExcludes())
ds.addDefaultExcludes();
if (config.getIncludes() != null)
ds.setIncludes(config.getIncludes());
if (config.getExcludes() != null)
ds.setExcludes(config.getExcludes());
ds.setBasedir(config.getBaseDirectory());
ds.setCaseSensitive(config.isCaseSensitive());
// Set the event listener
final XMLReceiverHelper helper = new XMLReceiverHelper(xmlReceiver);
ds.setEventListener(new DirectoryScanner.EventListener() {
private List<String> pathNames = new ArrayList<String>();
private List<String> paths = new ArrayList<String>();
private int pathLevel = 0;
private void enterDirectory(String path, String name, boolean included) {
if (name.equals(""))
return;
try {
if (included) {
outputPath();
pathLevel++;
helper.startElement(DIRECTORY_ELEMENT, new String[] {"name", name, "path", path + name, "included", "true"});
}
pathNames.add(name);
paths.add(path);
} catch (Exception e) {
throw new OXFException(e);
}
}
private void exitDirectory(String name) {
if (name.equals(""))
return;
try {
if (pathNames.size() <= pathLevel) {
helper.endElement();
pathLevel--;
}
pathNames.remove(pathNames.size() - 1);
paths.remove(paths.size() - 1);
} catch (Exception e) {
throw new OXFException(e);
}
}
private void outputPath() {
try {
for (int i = pathLevel; i < pathNames.size(); i++) {
String name = pathNames.get(i);
String path = paths.get(i);
helper.startElement(DIRECTORY_ELEMENT, new String[] {"name", name, "path", path + name});
}
pathLevel = pathNames.size();
} catch (Exception e) {
throw new OXFException(e);
}
}
public void deselectedFile(String path, String name) {
}
public void excludedFile(String path, String name) {
}
public void includedFile(String path, String name) {
outputPath();
try {
String filePath = path + name;
File file = new File(config.getBaseDirectory(), filePath);
long lastModified = file.lastModified();
String lastModifiedDate = DateUtils.DateTime().print(lastModified);
long fileSize = file.length();
helper.startElement(FILE_ELEMENT, new String[]{"last-modified-ms", Long.toString(lastModified),
"last-modified-date", lastModifiedDate,
"size", Long.toString(fileSize),
"path", filePath,
"name", name});
if (config.isImageMetadata()) {
outputImageMetadata(helper, config, file);
}
helper.endElement();
} catch (Exception e) {
throw new OXFException(e);
}
}
public void notIncludedFile(String path, String name) {
}
public void endDeselectedDir(String path, String name) {
exitDirectory(name);
}
public void endExcludedDir(String path, String name) {
exitDirectory(name);
}
public void endIncludedDir(String path, String name) {
exitDirectory(name);
}
public void endNotIncludedDir(String path, String name) {
exitDirectory(name);
}
public void startDeselectedDir(String path, String name) {
enterDirectory(path, name, false);
}
public void startExcludedDir(String path, String name) {
enterDirectory(path, name, false);
}
public void startIncludedDir(String path, String name) {
enterDirectory(path, name, true);
}
public void startNotIncludedDir(String path, String name) {
enterDirectory(path, name, false);
}
});
// Output elements
try {
final String baseDirectoryString = config.getBaseDirectory();
final File baseDirectoryFile = new File(baseDirectoryString);
if (!baseDirectoryFile.isDirectory())
throw new OXFException("base-directory element does not point to an existing directory: " + baseDirectoryString);
final String baseDirectoryName = baseDirectoryFile.getCanonicalFile().getName();
helper.startDocument();
helper.startElement(DIRECTORY_ELEMENT, new String[] { "name", baseDirectoryName, "path", baseDirectoryString });
// Do the scan
ds.scan();
// String[] directories = ds.getIncludedDirectories();
// String[] files = ds.getIncludedFiles();
//
// for (int i = 0; i < directories.length; i++)
// directories[i] = directories[i] + File.separator;
//
// String[] all = new String[directories.length + files.length];
// Arrays.sort(all);
//
// for (int i = 0; i < all.length; i++) {
// String current = all[i];
// boolean isDirectory = current.endsWith(File.separator);
//
// }
helper.endElement();
helper.endDocument();
} catch (Exception e) {
throw new OXFException(e);
}
}
};
addOutput(name, output);
return output;
}
private static void outputMetadata(XMLReceiverHelper helper, Metadata metadata, String elementName) throws MetadataException {
if (metadata.getDirectoryCount() > 0) {
for (Directory directory: metadata.getDirectories()) {
helper.startElement(elementName, new String[]{"name", directory.getName()});
for (Tag tag: directory.getTags()) {
helper.startElement(TAG_ELEMENT);
helper.element("id", tag.getTagType());
helper.element("name", tag.getTagName());
helper.element("value", tag.getDescription());
helper.endElement();
}
// TODO: Should do something with this?
if (directory.hasErrors()) {
for (String error: directory.getErrors()) {
//System.out.println("ERROR: " + k.next());
}
}
helper.endElement();
}
}
}
private static class Config {
private List<String> excludes;
private List<String> includes;
private String baseDirectory;
private boolean caseSensitive;
private boolean defaultExcludes;
private boolean basicInfo;
private boolean exifInfo;
private boolean iptcInfo;
public void addInclude(String pattern) {
if (includes == null)
includes = new ArrayList<String>();
includes.add(pattern);
}
public void addExclude(String pattern) {
if (excludes == null)
excludes = new ArrayList<String>();
excludes.add(pattern);
}
public void setBaseDirectory(String baseDirectory) {
this.baseDirectory = baseDirectory;
}
public void setCaseSensitive(boolean caseSensitive) {
this.caseSensitive = caseSensitive;
}
public void setDefaultExcludes(boolean defaultExcludes) {
this.defaultExcludes = defaultExcludes;
}
public String getBaseDirectory() {
return baseDirectory;
}
public boolean isCaseSensitive() {
return caseSensitive;
}
public boolean isDefaultExcludes() {
return defaultExcludes;
}
public String[] getExcludes() {
if (excludes == null)
return null;
String[] result = new String[excludes.size()];
excludes.toArray(result);
return result;
}
public String[] getIncludes() {
if (includes == null)
return null;
String[] result = new String[includes.size()];
includes.toArray(result);
return result;
}
public boolean isImageMetadata() {
return isBasicInfo() || isExifInfo() || isIptcInfo();
}
public boolean isExifInfo() {
return exifInfo;
}
public void setExifInfo(boolean exifInfo) {
this.exifInfo = exifInfo;
}
public boolean isIptcInfo() {
return iptcInfo;
}
public void setIptcInfo(boolean iptcInfo) {
this.iptcInfo = iptcInfo;
}
public boolean isBasicInfo() {
return basicInfo;
}
public void setBasicInfo(boolean basicInfo) {
this.basicInfo = basicInfo;
}
}
private static void outputImageMetadata(XMLReceiverHelper helper, Config config, File file) throws Exception {
helper.startElement("image-metadata");
InputStream is = new BufferedInputStream(new FileInputStream(file));
try {
String contentType = URLConnection.guessContentTypeFromStream(is);
if (contentType != null && contentType.startsWith("image/")) {
if (contentType.equals("image/jpeg")) {
JpegSegmentData segmentData = JpegSegmentReader.readSegments(new StreamReader(is), null);
// Basic info: content-type, size and comment
if (config.isBasicInfo()) {
helper.startElement("basic-info");
helper.element(Headers.ContentTypeLower(), contentType);
byte[] startOfFrameSegment = segmentData.getSegment(JpegSegmentType.SOF0);
if (startOfFrameSegment != null) {
// Big-endian, unsigned encoding
int width = NumberUtils.readShortBigEndian(startOfFrameSegment, 3) & 0xffff;
int height = NumberUtils.readShortBigEndian(startOfFrameSegment, 1) & 0xffff;
helper.element("width", width);
helper.element("height", height);
}
byte[] commentSegment = segmentData.getSegment(JpegSegmentType.COM);
if (commentSegment != null)
helper.element("comment", new String(commentSegment), Defaults.DefaultEncodingForServletCompatibility()); // probably just ASCII
helper.endElement();
}
// Exif info
if (config.isExifInfo()) {
byte[] exifSegment = segmentData.getSegment(JpegSegmentType.APP1);
if (exifSegment != null) {
Metadata metadata = new Metadata();
new ExifReader().extract(exifSegment, metadata, JpegSegmentType.APP1);
outputMetadata(helper, metadata, EXIF_ELEMENT);
}
}
// IPTC info
if (config.isIptcInfo()) {
byte[] iptcSegment = segmentData.getSegment(JpegSegmentType.APPD);
if (iptcSegment != null) {
Metadata metadata = new Metadata();
new IptcReader().extract(iptcSegment, metadata, JpegSegmentType.APPD);
outputMetadata(helper, metadata, IPTC_ELEMENT);
}
}
} else if (contentType.equals("image/gif")) {
// Basic info: content-type and size
if (config.isBasicInfo()) {
helper.startElement("basic-info");
helper.element(Headers.ContentTypeLower(), contentType);
byte[] bytes = new byte[10];
int count = is.read(bytes);
if (count == bytes.length) {
// Little-endian, unsigned encoding
int width = NumberUtils.readShortLittleEndian(bytes, 6) & 0xffff;
int height = NumberUtils.readShortLittleEndian(bytes, 8) & 0xffff;
helper.element("width", width);
helper.element("height", height);
}
helper.endElement();
}
} else if (contentType.equals("image/png")) {
// Basic info: content-type and size
if (config.isBasicInfo()) {
helper.startElement("basic-info");
helper.element(Headers.ContentTypeLower(), contentType);
// See http://www.libpng.org/pub/png/spec/1.2/
byte[] bytes = new byte[8 + 4 + 4 + 13];// header + chunk length + chunk type + IHDR content
int count = is.read(bytes);
if (count == bytes.length) {
if (bytes[12] == 'I' && bytes[13] == 'H' && bytes[14] == 'D' && bytes[15] == 'R') {
// int chunkLength = NumberUtils.readIntBigEndian(bytes, 8);
int width = NumberUtils.readIntBigEndian(bytes, 16);
int height = NumberUtils.readIntBigEndian(bytes, 20);
// int bitDepth = bytes[24];
// int colorType = bytes[25];
// int compressionMethod = bytes[26];
// int filterMethod = bytes[27];
// int interlaceMethod = bytes[28];
helper.element("width", width);
helper.element("height", height);
}
}
helper.endElement();
}
} else {
// Basic info: just content-type
if (config.isBasicInfo()) {
helper.startElement("basic-info");
helper.element(Headers.ContentTypeLower(), contentType);
helper.endElement();
}
}
}
} finally {
if (is != null)
is.close();
}
helper.endElement();
}
}