/* * Copyright (C) 2011 Jan Pokorsky * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package cz.cas.lib.proarc.common.imports; import cz.cas.lib.proarc.common.config.AppConfigurationException; import cz.cas.lib.proarc.common.dao.BatchItem.ObjectState; import cz.cas.lib.proarc.common.export.mets.JhoveContext; import cz.cas.lib.proarc.common.fedora.BinaryEditor; import cz.cas.lib.proarc.common.fedora.DigitalObjectException; import cz.cas.lib.proarc.common.fedora.FedoraObject; import cz.cas.lib.proarc.common.fedora.LocalStorage; import cz.cas.lib.proarc.common.fedora.LocalStorage.LocalObject; import cz.cas.lib.proarc.common.fedora.MixEditor; import cz.cas.lib.proarc.common.fedora.PageView.PageViewHandler; import cz.cas.lib.proarc.common.fedora.PageView.PageViewItem; import cz.cas.lib.proarc.common.fedora.StringEditor; import cz.cas.lib.proarc.common.fedora.XmlStreamEditor; import cz.cas.lib.proarc.common.fedora.relation.RelationEditor; import cz.cas.lib.proarc.common.imports.FileSet.FileEntry; import cz.cas.lib.proarc.common.imports.ImportBatchManager.BatchItemObject; import cz.cas.lib.proarc.common.imports.ImportProcess.ImportOptions; import cz.cas.lib.proarc.common.object.DigitalObjectHandler; import cz.cas.lib.proarc.common.object.DigitalObjectManager; import cz.cas.lib.proarc.common.object.MetadataHandler; import cz.cas.lib.proarc.common.ocr.AltoDatastream; import cz.cas.lib.proarc.common.process.ExternalProcess; import cz.cas.lib.proarc.common.process.KakaduCompress; import cz.incad.imgsupport.ImageMimeType; import cz.incad.imgsupport.ImageSupport; import cz.incad.imgsupport.ImageSupport.ScalingMethod; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import javax.imageio.stream.FileImageOutputStream; import javax.ws.rs.core.MediaType; import org.apache.commons.configuration.Configuration; /** * Requires Java Advanced Imaging support. * See http://www.oracle.com/technetwork/java/current-142188.html and * http://download.java.net/media/jai/builds/release/1_1_3/ * jai-1_1_3-lib.zip is a platform independent version * * http://download.java.net/media/jai-imageio/builds/release/1.1/ fo jai_imageio-1.1.jar * * For maven, try to depend just on com.sun.media.jai_imageio.1.1 as kramerius common. * How to properly depend in pom see http://sahits.ch/blog/?p=1038 * * @author Jan Pokorsky */ public final class TiffImporter { private static final Logger LOG = Logger.getLogger(TiffImporter.class.getName()); private final ImportBatchManager ibm; public TiffImporter(ImportBatchManager ibm) { this.ibm = ibm; } public boolean accept(FileSet fileSet) { return isTiff(fileSet); } public BatchItemObject consume(FileSet fileSet, ImportOptions ctx) { FileEntry tiffEntry = findTiff(fileSet); // check tiff file if (tiffEntry == null) { return null; } ImportProfile config = ctx.getConfig(); File f = tiffEntry.getFile(); String originalFilename = fileSet.getName(); // creates FOXML and metadata LocalObject localObj = createObject(originalFilename, ctx); BatchItemObject batchLocalObject = ibm.addLocalObject(ctx.getBatch(), localObj); try { if (!InputUtils.isTiff(f)) { throw new IllegalStateException("Not a TIFF content: " + f); } DigitalObjectHandler dobjHandler = DigitalObjectManager.getDefault().createHandler(localObj); createRelsExt(dobjHandler, f, ctx); createMetadata(dobjHandler, ctx); createImages(ctx.getTargetFolder(), f, originalFilename, localObj, config); importArchivalCopy(fileSet, f, localObj, ctx); importUserCopy(fileSet, f, localObj, ctx); importOcr(fileSet, localObj, ctx); createTechnicalMetadata(localObj, ctx); // writes FOXML dobjHandler.commit(); ibm.addChildRelation(ctx.getBatch(), null, localObj.getPid()); batchLocalObject.setState(ObjectState.LOADED); } catch (Throwable ex) { LOG.log(Level.SEVERE, f.toString(), ex); batchLocalObject.setState(ObjectState.LOADING_FAILED); batchLocalObject.setLog(ImportBatchManager.toString(ex)); } ibm.update(batchLocalObject); return batchLocalObject; } private LocalObject createObject(String originalFilename, ImportOptions ctx) { File tempBatchFolder = ctx.getTargetFolder(); LocalStorage storage = new LocalStorage(); File foxml = new File(tempBatchFolder, originalFilename + ".foxml"); LocalObject localObj = storage.create(foxml); localObj.setOwner(ctx.getUsername()); return localObj; } private void createMetadata(DigitalObjectHandler objHandler, ImportOptions ctx) throws DigitalObjectException { MetadataHandler<Object> mHandler = objHandler.metadata(); if (mHandler instanceof PageViewHandler) { // requires RELS-EXT model in place // creates MODS + DC + LABEL PageViewHandler pvHandler = (PageViewHandler) mHandler; String pageIndex = ctx.isGenerateIndices() ? String.valueOf(ctx.getConsumedFileCounter() + 1) : null; PageViewItem page = new PageViewItem(); page.setPageIndex(pageIndex); pvHandler.setPage(page, null); } else { throw new IllegalStateException("Unsupported metadata handler: " + mHandler); } } private void createRelsExt(DigitalObjectHandler objHandler, File f, ImportOptions ctx) throws DigitalObjectException { String fedoraModel = ctx.getModel(); RelationEditor relEditor = objHandler.relations(); relEditor.setModel(fedoraModel); relEditor.setDevice(ctx.getDevice()); relEditor.setImportFile(f.getName()); relEditor.write(0, null); // XXX use fedora-model:downloadFilename in RELS-INT or label of datastream to specify filename } private boolean isTiff(FileSet fileSet) { return findTiff(fileSet) != null; } private FileEntry findTiff(FileSet fileSet) { for (FileEntry entry : fileSet.getFiles()) { String mimetype = entry.getMimetype(); if (ImageMimeType.TIFF.getMimeType().equals(mimetype)) { return entry; } } return null; } private void importOcr(FileSet fileSet, FedoraObject fo, ImportOptions options) throws IOException, DigitalObjectException { // XXX find filename.ocr.txt or generate OCR or nothing // plain text OCR File tempBatchFolder = options.getTargetFolder(); String originalFilename = fileSet.getName(); ImportProfile config = options.getConfig(); List<Object> requiredDatastreamId = config.getRequiredDatastreamId(); FileEntry ocrEntry = findSibling(fileSet, config.getPlainOcrFileSuffix()); if (ocrEntry != null) { File ocrFile = new File(tempBatchFolder, originalFilename + '.' + StringEditor.OCR_ID + ".txt"); StringEditor.copy(ocrEntry.getFile(), config.getPlainOcrCharset(), ocrFile, "UTF-8"); XmlStreamEditor ocrEditor = fo.getEditor(StringEditor.ocrProfile()); ocrEditor.write(ocrFile.toURI(), 0, null); } else if (requiredDatastreamId.contains(StringEditor.OCR_ID)) { throw new FileNotFoundException("Missing OCR: " + new File(tempBatchFolder.getParent(), originalFilename + config.getPlainOcrFileSuffix()).toString()); } // ALTO OCR FileEntry altoEntry = findSibling(fileSet, config.getAltoFileSuffix()); if (altoEntry != null) { URI altoUri = altoEntry.getFile().toURI(); AltoDatastream.importAlto(fo, altoUri, null); } else if (requiredDatastreamId.contains(AltoDatastream.ALTO_ID)) { throw new FileNotFoundException("Missing ALTO: " + new File(tempBatchFolder.getParent(), originalFilename + config.getPlainOcrFileSuffix()).toString()); } } private FileEntry findSibling(FileSet fileSet, String filenameSuffix) { for (FileEntry entry : fileSet.getFiles()) { String filename = entry.getFile().getName().toLowerCase(); if (filename.endsWith(filenameSuffix)) { return entry; } } return null; } private void importArchivalCopy(FileSet fileSet, File tiff, FedoraObject fo, ImportOptions options) throws DigitalObjectException, IOException { ImportProfile config = options.getConfig(); FileEntry entry = findSibling(fileSet, config.getNdkArchivalFileSuffix()); String dsId = BinaryEditor.NDK_ARCHIVAL_ID; if (entry == null) { entry = processJp2Copy(fileSet, tiff, options.getTargetFolder(), dsId, config.getNdkArchivalProcessor()); } if (entry != null) { File entryFile = entry.getFile(); // do not use entry.getMimeType. JDK 1.6 does not recognize JPEG2000 if (!InputUtils.isJp2000(entryFile)) { throw new IllegalStateException("Not a JP2000 content: " + entryFile); } BinaryEditor binaryEditor = BinaryEditor.dissemination(fo, dsId, BinaryEditor.IMAGE_JP2); binaryEditor.write(entryFile, 0, null); } else if (config.getRequiredDatastreamId().contains(dsId)) { throw new FileNotFoundException("Missing archival JP2: " + new File( tiff.getParentFile(), fileSet.getName() + config.getNdkArchivalFileSuffix())); } } private void importUserCopy(FileSet fileSet, File tiff, FedoraObject fo, ImportOptions options) throws DigitalObjectException, IOException { ImportProfile config = options.getConfig(); FileEntry entry = findSibling(fileSet, config.getNdkUserFileSuffix()); String dsId = BinaryEditor.NDK_USER_ID; if (entry == null) { entry = processJp2Copy(fileSet, tiff, options.getTargetFolder(), dsId, config.getNdkUserProcessor()); } if (entry != null) { File entryFile = entry.getFile(); // do not use entry.getMimeType. JDK 1.6 does not recognize JPEG2000 if (!InputUtils.isJp2000(entryFile)) { throw new IllegalStateException("Not a JP2000 content: " + entryFile); } BinaryEditor binaryEditor = BinaryEditor.dissemination(fo, dsId, BinaryEditor.IMAGE_JP2); binaryEditor.write(entryFile, 0, null); } else if (config.getRequiredDatastreamId().contains(dsId)) { throw new FileNotFoundException("Missing user JP2: " + new File( tiff.getParentFile(), fileSet.getName() + config.getNdkUserFileSuffix())); } } private FileEntry processJp2Copy(FileSet fileSet, File tiff, File tempBatchFolder, String dsId, Configuration processorConfig) throws IOException { if (processorConfig != null && !processorConfig.isEmpty()) { File acFile = new File(tempBatchFolder, fileSet.getName() + '.' + dsId + ".jp2"); String processorType = processorConfig.getString("type"); ExternalProcess process = null; if (KakaduCompress.ID.equals(processorType)) { process = new KakaduCompress(processorConfig, tiff, acFile); } if (process != null) { process.run(); if (!process.isOk()) { throw new IOException(acFile.toString() + "\n" + process.getFullOutput()); } } return new FileEntry(acFile); } return null; } private void createImages(File tempBatchFolder, File original, String originalFilename, LocalObject foxml, ImportProfile config) throws IOException, DigitalObjectException, AppConfigurationException { BinaryEditor.dissemination(foxml, BinaryEditor.RAW_ID, BinaryEditor.IMAGE_TIFF) .write(original, 0, null); long start = System.nanoTime(); BufferedImage tiff = ImageSupport.readImage(original.toURI().toURL(), ImageMimeType.TIFF); long endRead = System.nanoTime() - start; ImageMimeType imageType = ImageMimeType.JPEG; MediaType mediaType = MediaType.valueOf(imageType.getMimeType()); start = System.nanoTime(); String targetName = String.format("%s.full.%s", originalFilename, imageType.getDefaultFileExtension()); File f = writeImage(tiff, tempBatchFolder, targetName, imageType); if (!InputUtils.isJpeg(f)) { throw new IllegalStateException("Not a JPEG content: " + f); } long endFull = System.nanoTime() - start; BinaryEditor.dissemination(foxml, BinaryEditor.FULL_ID, mediaType).write(f, 0, null); start = System.nanoTime(); Integer previewMaxHeight = config.getPreviewMaxHeight(); Integer previewMaxWidth = config.getPreviewMaxWidth(); config.checkPreviewScaleParams(); targetName = String.format("%s.preview.%s", originalFilename, imageType.getDefaultFileExtension()); f = writeImage( scale(tiff, config.getPreviewScaling(), previewMaxWidth, previewMaxHeight), tempBatchFolder, targetName, imageType); if (!InputUtils.isJpeg(f)) { throw new IllegalStateException("Not a JPEG content: " + f); } long endPreview = System.nanoTime() - start; BinaryEditor.dissemination(foxml, BinaryEditor.PREVIEW_ID, mediaType).write(f, 0, null); start = System.nanoTime(); f = createThumbnail(tempBatchFolder, originalFilename, original, tiff, config); long endThumb = System.nanoTime() - start; BinaryEditor.dissemination(foxml, BinaryEditor.THUMB_ID, mediaType).write(f, 0, null); LOG.fine(String.format("file: %s, read: %s, full: %s, preview: %s, thumb: %s", originalFilename, endRead / 1000000, endFull / 1000000, endPreview / 1000000, endThumb / 1000000)); } private File createThumbnail(File tempBatchFolder, String originalFilename, File original, BufferedImage tiff, ImportProfile config) throws AppConfigurationException, IOException { ImageMimeType imageType = ImageMimeType.JPEG; String targetName = String.format("%s.thumb.%s", originalFilename, imageType.getDefaultFileExtension()); // XXX requieres import profiles // Configuration processCfg = config.getThumbnailProcessor(); // if (processCfg.isEmpty()) { return createJavaThumbnail(tempBatchFolder, targetName, imageType, tiff, config); // } else { // GenericExternalProcess process = new GenericExternalProcess(processCfg); // process.addInputFile(original); // process.addOutputFile(new File(tempBatchFolder, targetName)); // process.run(); // if (!process.isOk()) { // throw new IOException(process.getOutputFile().toString() + "\n" + process.getFullOutput()); // } // return process.getOutputFile(); // } } private File createJavaThumbnail(File tempBatchFolder, String targetName, ImageMimeType imageType, BufferedImage tiff, ImportProfile config) throws AppConfigurationException, IOException { Integer thumbMaxHeight = config.getThumbnailMaxHeight(); Integer thumbMaxWidth = config.getThumbnailMaxWidth(); config.checkThumbnailScaleParams(); File f = writeImage( scale(tiff, config.getThumbnailScaling(), thumbMaxWidth, thumbMaxHeight), tempBatchFolder, targetName, imageType); if (!InputUtils.isJpeg(f)) { throw new IllegalStateException("Not a JPEG content: " + f); } return f; } private static File writeImage(BufferedImage image, File folder, String filename, ImageMimeType imageType) throws IOException { File imgFile = new File(folder, filename); FileImageOutputStream fos = new FileImageOutputStream(imgFile); try { ImageSupport.writeImageToStream(image, imageType.getDefaultFileExtension(), fos, 1.0f); return imgFile; } finally { fos.close(); } } private static BufferedImage scale(BufferedImage tiff, ScalingMethod method, Integer maxWidth, Integer maxHeight) { long start = System.nanoTime(); int height = tiff.getHeight(); int width = tiff.getWidth(); int targetWidth = width; int targetHeight = height; double scale = Double.MAX_VALUE; if (maxHeight != null && height > maxHeight) { scale = (double) maxHeight / height; } if (maxWidth != null && width > maxWidth) { double scalew = (double) maxWidth / width; scale = Math.min(scale, scalew); } if (scale != Double.MAX_VALUE) { targetHeight = (int) (height * scale); targetWidth = (int) (width * scale); } BufferedImage scaled = ImageSupport.scale(tiff, targetWidth, targetHeight, method, true); LOG.fine(String.format("scaled [%s, %s] to [%s, %s], boundary [%s, %s] [w, h], time: %s ms", width, height, targetWidth, targetHeight, maxWidth, maxHeight, (System.nanoTime() - start) / 1000000)); return scaled; } private void createTechnicalMetadata(LocalObject localObj, ImportOptions ctx) throws DigitalObjectException { JhoveContext jhoveCtx = ctx.getJhoveContext(); File file = BinaryEditor.dissemination(localObj, BinaryEditor.RAW_ID, BinaryEditor.IMAGE_TIFF).read(); MixEditor mixEditor = MixEditor.raw(localObj); mixEditor.write(file, jhoveCtx, mixEditor.getLastModified(), null); // NDK version file = BinaryEditor.dissemination(localObj, BinaryEditor.NDK_ARCHIVAL_ID, BinaryEditor.IMAGE_JP2).read(); if (file != null) { mixEditor = MixEditor.ndkArchival(localObj); mixEditor.write(file, jhoveCtx, mixEditor.getLastModified(), null); } } }