/**
* JHOVE2 - Next-generation architecture for format-aware characterization
*
* Copyright (c) 2009 by The Regents of the University of California,
* Ithaka Harbors, Inc., and The Board of Trustees of the Leland Stanford
* Junior University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* o Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* o Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* o Neither the name of the University of California/California Digital
* Library, Ithaka Harbors/Portico, or Stanford University, nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jhove2.module.identify.file;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.jhove2.core.I8R;
import org.jhove2.core.JHOVE2;
import org.jhove2.core.JHOVE2Exception;
import org.jhove2.core.format.FormatIdentification;
import org.jhove2.core.format.FormatIdentification.Confidence;
import org.jhove2.core.io.Input;
import org.jhove2.core.source.Source;
import org.jhove2.module.identify.AbstractFileSourceIdentifier;
import org.jhove2.module.identify.SourceIdentifier;
import org.jhove2.persist.ModuleAccessor;
import com.sleepycat.persist.model.Persistent;
/**
* A {@link SourceIdentifier source identifier} that wraps the
* libmagic dynamic library of the UNIX <code>file</code> identifier
* tool.
*
* @author hbian
*/
@Persistent
public class LibmagicIdentifier extends AbstractFileSourceIdentifier
implements SourceIdentifier {
/** Module version identifier. */
public final static String VERSION = "2.0.0";
/** Module release date. */
public final static String RELEASE = "2011-01-31";
/** Module rights statement. */
public final static String RIGHTS =
"Copyright 2010-2011 by Bibliotheque nationale de France. " +
"Available under the terms of the BSD license.";
/** Magic database file extension. */
private final static String MAGIC_DB_EXTENSION = ".mgc";
/** Map MIME types to JHOVE2 format identifiers. */
private static Map<String,I8R> mimeTypesToFormatIds;
/** The directory where source Magic definition files are stored. */
private File magicFileDir = null;
/** The compiled Magic file generated by this class. */
private File compiledMagic = null;
/** Instance of Libmagic JNA wrapper. */
private static transient LibmagicJnaWrapper libmagicWrapper;
/**Instantiate a new <code>DROIDIdentifier</code> module that wraps DROID.
* @throws JHOVE2Exception
*/
public LibmagicIdentifier() throws JHOVE2Exception {
this(null);
}
/**Instantiate a new <code>DROIDIdentifier</code> module that wraps DROID.
* @param moduleAccessor persistence manager
* @throws JHOVE2Exception
*/
public LibmagicIdentifier(ModuleAccessor moduleAccessor)
throws JHOVE2Exception {
super(VERSION, RELEASE, RIGHTS, Scope.Generic, moduleAccessor);
}
/**
* Initializes this instance. This method should be called once
* all the configuration properties (dependency injection) set.
*
* @throws JHOVE2Exception if any error occurred.
*/
public void init() throws JHOVE2Exception {
}
/**
* Initialize module if it has not been done yet.
* @throws JHOVE2Exception if an error occurs during initialization
*/
public void checkIfInitialized() throws JHOVE2Exception {
if (libmagicWrapper == null) {
// Initialize libmagic wrapper.
libmagicWrapper = new LibmagicJnaWrapper();
String compiledMagicPath = null;
if (this.magicFileDir != null) {
// Magic source directory set. => Compile magic files.
if (libmagicWrapper.compile(
magicFileDir.getAbsolutePath()) != 0) {
throw new JHOVE2Exception("Magic file compile error: "
+ libmagicWrapper.getError());
}
// Look for compiled magic file. Its location varies according to
// some compilation options of libmagic.
// Was it stored in the current directory?
String dbName = magicFileDir.getName() + MAGIC_DB_EXTENSION;
File magicPath = new File(dbName);
if (! magicPath.isFile()) {
// Nope! Must be in the parent dir. of the source definitions.
magicPath = new File(magicFileDir.getParentFile(), dbName);
}
compiledMagicPath = magicPath.getAbsolutePath();
// Keep compiled file ref. for shutdown time cleanup.
compiledMagic = magicPath;
compiledMagic.deleteOnExit();
}
// Load magic definitions.
if (libmagicWrapper.load(compiledMagicPath) != 0) {
String fileRef = (compiledMagicPath != null)?
"Magic database \"" + compiledMagicPath + '"':
"Default magic database";
throw new JHOVE2Exception(fileRef + " load error: "
+ libmagicWrapper.getError());
}
}
}
/**
* Shuts down this instance, releasing used resources.
*/
public void shutdown() {
if (libmagicWrapper != null) {
libmagicWrapper.close();
libmagicWrapper = null;
}
if (compiledMagic != null) {
// Delete compiled magic file.
compiledMagic.delete();
compiledMagic = null;
}
}
/** {@inheritDoc} */
@Override
public Set<FormatIdentification> identify(JHOVE2 jhove2, Source source,
Input input) throws IOException, JHOVE2Exception {
checkIfInitialized();
// Extract MIME type and encoding using libmagic.
String mimeType = null;
/* The following code is not compatible with the current JHove2 core.
if ((input != null) && (input.getSize() < input.getMaxBufferSize())) {
// Memory buffer
mimeType = libmagicWrapper.getMimeType(
input.getBuffer(), input.getSize());
}
else {
// Too large to fit in a single buffer. => Use temp. file.
mimeType = libmagicWrapper.getMimeType(
source.getFile().getAbsolutePath());
}
*/
if (input != null) {
mimeType = libmagicWrapper.getMimeType(
source.getFile().getAbsolutePath());
}
String typeWithEncoding = null;
if (mimeType != null) {
// Extract character encoding.
int i = mimeType.indexOf("; charset");
if (i != -1) {
// Found! => Reconstruct charset information the JHove2 way...
String charset = mimeType.substring(
mimeType.indexOf('=', i+1) + 1);
mimeType = mimeType.substring(0, i);
typeWithEncoding = mimeType;
if ("text/plain".equals(mimeType)) {
typeWithEncoding +=
"; charset=\"" + charset.toUpperCase() + '"';
}
// Else: ignore "charset=BINARY" present for non-text files.
}
else {
typeWithEncoding = mimeType;
}
}
I8R jhoveId = null;
I8R fileId = null;
Confidence confidence = Confidence.Negative;
// Convert MIME type into JHove2 format identifier.
if ((mimeType != null) && (mimeType.length() != 0)) {
fileId = new I8R(mimeType, I8R.Namespace.MIME);
jhoveId = getJhoveFormatId(jhove2, typeWithEncoding);
if (jhoveId != null) {
confidence = Confidence.PositiveSpecific;
}
}
Set<FormatIdentification> presumptiveFormatIds =
new TreeSet<FormatIdentification>();
presumptiveFormatIds.add(
new FormatIdentification(jhoveId, confidence,
getReportableIdentifier(), fileId, null));
return presumptiveFormatIds;
}
/**
* Returns the {@link I8R identifier} of the JHove2 format
* corresponding to the specified MIME type.
* @param jhove2 the JHove2 characterization context.
* @param mimeType the MIME type to convert.
*
* @return the JHove2 format identifier or <code>null</code> if no
* matching JHove2 format was defined in the configuration
* for the specified MIME type.
* @throws JHOVE2Exception if any error occurred while reading the
* JHove2 configuration.
*/
private static I8R getJhoveFormatId(JHOVE2 jhove2, String mimeType)
throws JHOVE2Exception {
if (mimeTypesToFormatIds == null) {
Map<String,I8R> m = new HashMap<String,I8R>();
Map<String,String> ids = jhove2.getConfigInfo()
.getFormatAliasIdsToJ2Ids(I8R.Namespace.MIME);
for(Map.Entry<String,String> e : ids.entrySet()) {
m.put(e.getKey(), new I8R(e.getValue()));
}
mimeTypesToFormatIds = Collections.unmodifiableMap(m);
}
return mimeTypesToFormatIds.get(mimeType);
}
/**
* <i>Dependency injection</i> Sets the directory where to look
* for Magic definition files. If this property is set, this class
* compiles the definition files prior attempting identification.
* @param magicDir the directory holding the magic definition
* files or <code>null</code> to use the default
* already-compiled definitions.
*/
public void setMagicFileDir(File magicDir) {
magicFileDir = magicDir;
}
}