/*
* Copyright (C) 2011 Jan Pokorsky
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.cas.lib.proarc.common.imports;
import cz.cas.lib.proarc.common.config.AppConfigurationException;
import cz.cas.lib.proarc.common.dao.BatchItem.ObjectState;
import cz.cas.lib.proarc.common.export.mets.JhoveContext;
import cz.cas.lib.proarc.common.fedora.BinaryEditor;
import cz.cas.lib.proarc.common.fedora.DigitalObjectException;
import cz.cas.lib.proarc.common.fedora.FedoraObject;
import cz.cas.lib.proarc.common.fedora.LocalStorage;
import cz.cas.lib.proarc.common.fedora.LocalStorage.LocalObject;
import cz.cas.lib.proarc.common.fedora.MixEditor;
import cz.cas.lib.proarc.common.fedora.PageView.PageViewHandler;
import cz.cas.lib.proarc.common.fedora.PageView.PageViewItem;
import cz.cas.lib.proarc.common.fedora.StringEditor;
import cz.cas.lib.proarc.common.fedora.XmlStreamEditor;
import cz.cas.lib.proarc.common.fedora.relation.RelationEditor;
import cz.cas.lib.proarc.common.imports.FileSet.FileEntry;
import cz.cas.lib.proarc.common.imports.ImportBatchManager.BatchItemObject;
import cz.cas.lib.proarc.common.imports.ImportProcess.ImportOptions;
import cz.cas.lib.proarc.common.object.DigitalObjectHandler;
import cz.cas.lib.proarc.common.object.DigitalObjectManager;
import cz.cas.lib.proarc.common.object.MetadataHandler;
import cz.cas.lib.proarc.common.ocr.AltoDatastream;
import cz.cas.lib.proarc.common.process.ExternalProcess;
import cz.cas.lib.proarc.common.process.KakaduCompress;
import cz.incad.imgsupport.ImageMimeType;
import cz.incad.imgsupport.ImageSupport;
import cz.incad.imgsupport.ImageSupport.ScalingMethod;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.imageio.stream.FileImageOutputStream;
import javax.ws.rs.core.MediaType;
import org.apache.commons.configuration.Configuration;
/**
* Requires Java Advanced Imaging support.
* See http://www.oracle.com/technetwork/java/current-142188.html and
* http://download.java.net/media/jai/builds/release/1_1_3/
* jai-1_1_3-lib.zip is a platform independent version
*
* http://download.java.net/media/jai-imageio/builds/release/1.1/ fo jai_imageio-1.1.jar
*
* For maven, try to depend just on com.sun.media.jai_imageio.1.1 as kramerius common.
* How to properly depend in pom see http://sahits.ch/blog/?p=1038
*
* @author Jan Pokorsky
*/
public final class TiffImporter {
private static final Logger LOG = Logger.getLogger(TiffImporter.class.getName());
private final ImportBatchManager ibm;
public TiffImporter(ImportBatchManager ibm) {
this.ibm = ibm;
}
public boolean accept(FileSet fileSet) {
return isTiff(fileSet);
}
public BatchItemObject consume(FileSet fileSet, ImportOptions ctx) {
FileEntry tiffEntry = findTiff(fileSet);
// check tiff file
if (tiffEntry == null) {
return null;
}
ImportProfile config = ctx.getConfig();
File f = tiffEntry.getFile();
String originalFilename = fileSet.getName();
// creates FOXML and metadata
LocalObject localObj = createObject(originalFilename, ctx);
BatchItemObject batchLocalObject = ibm.addLocalObject(ctx.getBatch(), localObj);
try {
if (!InputUtils.isTiff(f)) {
throw new IllegalStateException("Not a TIFF content: " + f);
}
DigitalObjectHandler dobjHandler = DigitalObjectManager.getDefault().createHandler(localObj);
createRelsExt(dobjHandler, f, ctx);
createMetadata(dobjHandler, ctx);
createImages(ctx.getTargetFolder(), f, originalFilename, localObj, config);
importArchivalCopy(fileSet, f, localObj, ctx);
importUserCopy(fileSet, f, localObj, ctx);
importOcr(fileSet, localObj, ctx);
createTechnicalMetadata(localObj, ctx);
// writes FOXML
dobjHandler.commit();
ibm.addChildRelation(ctx.getBatch(), null, localObj.getPid());
batchLocalObject.setState(ObjectState.LOADED);
} catch (Throwable ex) {
LOG.log(Level.SEVERE, f.toString(), ex);
batchLocalObject.setState(ObjectState.LOADING_FAILED);
batchLocalObject.setLog(ImportBatchManager.toString(ex));
}
ibm.update(batchLocalObject);
return batchLocalObject;
}
private LocalObject createObject(String originalFilename, ImportOptions ctx) {
File tempBatchFolder = ctx.getTargetFolder();
LocalStorage storage = new LocalStorage();
File foxml = new File(tempBatchFolder, originalFilename + ".foxml");
LocalObject localObj = storage.create(foxml);
localObj.setOwner(ctx.getUsername());
return localObj;
}
private void createMetadata(DigitalObjectHandler objHandler, ImportOptions ctx) throws DigitalObjectException {
MetadataHandler<Object> mHandler = objHandler.metadata();
if (mHandler instanceof PageViewHandler) {
// requires RELS-EXT model in place
// creates MODS + DC + LABEL
PageViewHandler pvHandler = (PageViewHandler) mHandler;
String pageIndex = ctx.isGenerateIndices() ? String.valueOf(ctx.getConsumedFileCounter() + 1) : null;
PageViewItem page = new PageViewItem();
page.setPageIndex(pageIndex);
pvHandler.setPage(page, null);
} else {
throw new IllegalStateException("Unsupported metadata handler: " + mHandler);
}
}
private void createRelsExt(DigitalObjectHandler objHandler, File f, ImportOptions ctx) throws DigitalObjectException {
String fedoraModel = ctx.getModel();
RelationEditor relEditor = objHandler.relations();
relEditor.setModel(fedoraModel);
relEditor.setDevice(ctx.getDevice());
relEditor.setImportFile(f.getName());
relEditor.write(0, null);
// XXX use fedora-model:downloadFilename in RELS-INT or label of datastream to specify filename
}
private boolean isTiff(FileSet fileSet) {
return findTiff(fileSet) != null;
}
private FileEntry findTiff(FileSet fileSet) {
for (FileEntry entry : fileSet.getFiles()) {
String mimetype = entry.getMimetype();
if (ImageMimeType.TIFF.getMimeType().equals(mimetype)) {
return entry;
}
}
return null;
}
private void importOcr(FileSet fileSet, FedoraObject fo, ImportOptions options)
throws IOException, DigitalObjectException {
// XXX find filename.ocr.txt or generate OCR or nothing
// plain text OCR
File tempBatchFolder = options.getTargetFolder();
String originalFilename = fileSet.getName();
ImportProfile config = options.getConfig();
List<Object> requiredDatastreamId = config.getRequiredDatastreamId();
FileEntry ocrEntry = findSibling(fileSet, config.getPlainOcrFileSuffix());
if (ocrEntry != null) {
File ocrFile = new File(tempBatchFolder, originalFilename + '.' + StringEditor.OCR_ID + ".txt");
StringEditor.copy(ocrEntry.getFile(), config.getPlainOcrCharset(), ocrFile, "UTF-8");
XmlStreamEditor ocrEditor = fo.getEditor(StringEditor.ocrProfile());
ocrEditor.write(ocrFile.toURI(), 0, null);
} else if (requiredDatastreamId.contains(StringEditor.OCR_ID)) {
throw new FileNotFoundException("Missing OCR: " + new File(tempBatchFolder.getParent(),
originalFilename + config.getPlainOcrFileSuffix()).toString());
}
// ALTO OCR
FileEntry altoEntry = findSibling(fileSet, config.getAltoFileSuffix());
if (altoEntry != null) {
URI altoUri = altoEntry.getFile().toURI();
AltoDatastream.importAlto(fo, altoUri, null);
} else if (requiredDatastreamId.contains(AltoDatastream.ALTO_ID)) {
throw new FileNotFoundException("Missing ALTO: " + new File(tempBatchFolder.getParent(),
originalFilename + config.getPlainOcrFileSuffix()).toString());
}
}
private FileEntry findSibling(FileSet fileSet, String filenameSuffix) {
for (FileEntry entry : fileSet.getFiles()) {
String filename = entry.getFile().getName().toLowerCase();
if (filename.endsWith(filenameSuffix)) {
return entry;
}
}
return null;
}
private void importArchivalCopy(FileSet fileSet, File tiff, FedoraObject fo, ImportOptions options) throws DigitalObjectException, IOException {
ImportProfile config = options.getConfig();
FileEntry entry = findSibling(fileSet, config.getNdkArchivalFileSuffix());
String dsId = BinaryEditor.NDK_ARCHIVAL_ID;
if (entry == null) {
entry = processJp2Copy(fileSet, tiff, options.getTargetFolder(), dsId, config.getNdkArchivalProcessor());
}
if (entry != null) {
File entryFile = entry.getFile();
// do not use entry.getMimeType. JDK 1.6 does not recognize JPEG2000
if (!InputUtils.isJp2000(entryFile)) {
throw new IllegalStateException("Not a JP2000 content: " + entryFile);
}
BinaryEditor binaryEditor = BinaryEditor.dissemination(fo, dsId, BinaryEditor.IMAGE_JP2);
binaryEditor.write(entryFile, 0, null);
} else if (config.getRequiredDatastreamId().contains(dsId)) {
throw new FileNotFoundException("Missing archival JP2: " + new File(
tiff.getParentFile(), fileSet.getName() + config.getNdkArchivalFileSuffix()));
}
}
private void importUserCopy(FileSet fileSet, File tiff, FedoraObject fo, ImportOptions options) throws DigitalObjectException, IOException {
ImportProfile config = options.getConfig();
FileEntry entry = findSibling(fileSet, config.getNdkUserFileSuffix());
String dsId = BinaryEditor.NDK_USER_ID;
if (entry == null) {
entry = processJp2Copy(fileSet, tiff, options.getTargetFolder(), dsId, config.getNdkUserProcessor());
}
if (entry != null) {
File entryFile = entry.getFile();
// do not use entry.getMimeType. JDK 1.6 does not recognize JPEG2000
if (!InputUtils.isJp2000(entryFile)) {
throw new IllegalStateException("Not a JP2000 content: " + entryFile);
}
BinaryEditor binaryEditor = BinaryEditor.dissemination(fo, dsId, BinaryEditor.IMAGE_JP2);
binaryEditor.write(entryFile, 0, null);
} else if (config.getRequiredDatastreamId().contains(dsId)) {
throw new FileNotFoundException("Missing user JP2: " + new File(
tiff.getParentFile(), fileSet.getName() + config.getNdkUserFileSuffix()));
}
}
private FileEntry processJp2Copy(FileSet fileSet, File tiff, File tempBatchFolder, String dsId, Configuration processorConfig) throws IOException {
if (processorConfig != null && !processorConfig.isEmpty()) {
File acFile = new File(tempBatchFolder, fileSet.getName() + '.' + dsId + ".jp2");
String processorType = processorConfig.getString("type");
ExternalProcess process = null;
if (KakaduCompress.ID.equals(processorType)) {
process = new KakaduCompress(processorConfig, tiff, acFile);
}
if (process != null) {
process.run();
if (!process.isOk()) {
throw new IOException(acFile.toString() + "\n" + process.getFullOutput());
}
}
return new FileEntry(acFile);
}
return null;
}
private void createImages(File tempBatchFolder, File original,
String originalFilename, LocalObject foxml, ImportProfile config)
throws IOException, DigitalObjectException, AppConfigurationException {
BinaryEditor.dissemination(foxml, BinaryEditor.RAW_ID, BinaryEditor.IMAGE_TIFF)
.write(original, 0, null);
long start = System.nanoTime();
BufferedImage tiff = ImageSupport.readImage(original.toURI().toURL(), ImageMimeType.TIFF);
long endRead = System.nanoTime() - start;
ImageMimeType imageType = ImageMimeType.JPEG;
MediaType mediaType = MediaType.valueOf(imageType.getMimeType());
start = System.nanoTime();
String targetName = String.format("%s.full.%s", originalFilename, imageType.getDefaultFileExtension());
File f = writeImage(tiff, tempBatchFolder, targetName, imageType);
if (!InputUtils.isJpeg(f)) {
throw new IllegalStateException("Not a JPEG content: " + f);
}
long endFull = System.nanoTime() - start;
BinaryEditor.dissemination(foxml, BinaryEditor.FULL_ID, mediaType).write(f, 0, null);
start = System.nanoTime();
Integer previewMaxHeight = config.getPreviewMaxHeight();
Integer previewMaxWidth = config.getPreviewMaxWidth();
config.checkPreviewScaleParams();
targetName = String.format("%s.preview.%s", originalFilename, imageType.getDefaultFileExtension());
f = writeImage(
scale(tiff, config.getPreviewScaling(), previewMaxWidth, previewMaxHeight),
tempBatchFolder, targetName, imageType);
if (!InputUtils.isJpeg(f)) {
throw new IllegalStateException("Not a JPEG content: " + f);
}
long endPreview = System.nanoTime() - start;
BinaryEditor.dissemination(foxml, BinaryEditor.PREVIEW_ID, mediaType).write(f, 0, null);
start = System.nanoTime();
f = createThumbnail(tempBatchFolder, originalFilename, original, tiff, config);
long endThumb = System.nanoTime() - start;
BinaryEditor.dissemination(foxml, BinaryEditor.THUMB_ID, mediaType).write(f, 0, null);
LOG.fine(String.format("file: %s, read: %s, full: %s, preview: %s, thumb: %s",
originalFilename, endRead / 1000000, endFull / 1000000, endPreview / 1000000, endThumb / 1000000));
}
private File createThumbnail(File tempBatchFolder, String originalFilename, File original, BufferedImage tiff, ImportProfile config)
throws AppConfigurationException, IOException {
ImageMimeType imageType = ImageMimeType.JPEG;
String targetName = String.format("%s.thumb.%s", originalFilename, imageType.getDefaultFileExtension());
// XXX requieres import profiles
// Configuration processCfg = config.getThumbnailProcessor();
// if (processCfg.isEmpty()) {
return createJavaThumbnail(tempBatchFolder, targetName, imageType, tiff, config);
// } else {
// GenericExternalProcess process = new GenericExternalProcess(processCfg);
// process.addInputFile(original);
// process.addOutputFile(new File(tempBatchFolder, targetName));
// process.run();
// if (!process.isOk()) {
// throw new IOException(process.getOutputFile().toString() + "\n" + process.getFullOutput());
// }
// return process.getOutputFile();
// }
}
private File createJavaThumbnail(File tempBatchFolder, String targetName, ImageMimeType imageType, BufferedImage tiff, ImportProfile config)
throws AppConfigurationException, IOException {
Integer thumbMaxHeight = config.getThumbnailMaxHeight();
Integer thumbMaxWidth = config.getThumbnailMaxWidth();
config.checkThumbnailScaleParams();
File f = writeImage(
scale(tiff, config.getThumbnailScaling(), thumbMaxWidth, thumbMaxHeight),
tempBatchFolder, targetName, imageType);
if (!InputUtils.isJpeg(f)) {
throw new IllegalStateException("Not a JPEG content: " + f);
}
return f;
}
private static File writeImage(BufferedImage image, File folder, String filename, ImageMimeType imageType) throws IOException {
File imgFile = new File(folder, filename);
FileImageOutputStream fos = new FileImageOutputStream(imgFile);
try {
ImageSupport.writeImageToStream(image, imageType.getDefaultFileExtension(), fos, 1.0f);
return imgFile;
} finally {
fos.close();
}
}
private static BufferedImage scale(BufferedImage tiff, ScalingMethod method,
Integer maxWidth, Integer maxHeight) {
long start = System.nanoTime();
int height = tiff.getHeight();
int width = tiff.getWidth();
int targetWidth = width;
int targetHeight = height;
double scale = Double.MAX_VALUE;
if (maxHeight != null && height > maxHeight) {
scale = (double) maxHeight / height;
}
if (maxWidth != null && width > maxWidth) {
double scalew = (double) maxWidth / width;
scale = Math.min(scale, scalew);
}
if (scale != Double.MAX_VALUE) {
targetHeight = (int) (height * scale);
targetWidth = (int) (width * scale);
}
BufferedImage scaled = ImageSupport.scale(tiff, targetWidth, targetHeight, method, true);
LOG.fine(String.format("scaled [%s, %s] to [%s, %s], boundary [%s, %s] [w, h], time: %s ms",
width, height, targetWidth, targetHeight, maxWidth, maxHeight, (System.nanoTime() - start) / 1000000));
return scaled;
}
private void createTechnicalMetadata(LocalObject localObj, ImportOptions ctx)
throws DigitalObjectException {
JhoveContext jhoveCtx = ctx.getJhoveContext();
File file = BinaryEditor.dissemination(localObj, BinaryEditor.RAW_ID, BinaryEditor.IMAGE_TIFF).read();
MixEditor mixEditor = MixEditor.raw(localObj);
mixEditor.write(file, jhoveCtx, mixEditor.getLastModified(), null);
// NDK version
file = BinaryEditor.dissemination(localObj, BinaryEditor.NDK_ARCHIVAL_ID, BinaryEditor.IMAGE_JP2).read();
if (file != null) {
mixEditor = MixEditor.ndkArchival(localObj);
mixEditor.write(file, jhoveCtx, mixEditor.getLastModified(), null);
}
}
}