/*
* regain/Thumbnailer - A file search engine providing plenty of formats (Plugin)
* Copyright (C) 2011 Come_IN Computerclubs (University of Siegen)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Contact: Come_IN-Team <come_in-team@listserv.uni-siegen.de>
*/
package de.uni_siegen.wineme.come_in.thumbnailer.util.mime;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.ontoware.rdf2go.model.node.impl.URIImpl;
import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier;
import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifierFactory;
import de.uni_siegen.wineme.come_in.thumbnailer.util.IOUtil;
/**
* Wrapper class for MIME Identification of Files.
*
* Depends:
* <li>Aperture (for MIME-Detection)
*/
public class MimeTypeDetector {
private MagicMimeTypeIdentifier mimeTypeIdentifier;
private List<MimeTypeIdentifier> extraIdentifiers;
private static Logger mLog = Logger.getLogger(MimeTypeDetector.class);
/**
* Create a MimeType Detector and init it.
*/
public MimeTypeDetector()
{
MagicMimeTypeIdentifierFactory mimeTypeFactory = new MagicMimeTypeIdentifierFactory();
mimeTypeIdentifier = (MagicMimeTypeIdentifier) mimeTypeFactory.get();
extraIdentifiers = new ArrayList<MimeTypeIdentifier>();
addMimeTypeIdentifier(new ScratchFileIdentifier());
addMimeTypeIdentifier(new Office2007FileIdentifier());
addMimeTypeIdentifier(new PptFileIdentifier());
addMimeTypeIdentifier(new XlsFileIdentifier());
addMimeTypeIdentifier(new DocFileIdentifier());
}
/**
* Add a new MimeTypeIdentifier to this Detector.
* MimeTypeIdentifier may override the decision of the detector.
* The order the identifiers are added will also be the order they will be executed
* (i.e., the last identifiers may override all others.)
*
* @param identifier a new MimeTypeIdentifier
*/
public void addMimeTypeIdentifier(MimeTypeIdentifier identifier)
{
extraIdentifiers.add(identifier);
}
/**
* Detect MIME-Type for this file.
*
* @param file File to analyse
* @return String of MIME-Type, or null if no detection was possible (or unknown MIME Type)
*/
public String getMimeType(File file)
{
byte[] bytes = new byte[mimeTypeIdentifier.getMinArrayLength()];
FileInputStream fis = null;
try {
fis = new FileInputStream(file);
fis.read(bytes);
fis.close();
fis = null;
} catch (IOException e) {
return null; // File does not exist or other I/O Error
} finally {
IOUtil.quietlyClose(fis);
}
String file_url = file.toURI().toASCIIString();
String mimeType = mimeTypeIdentifier.identify(bytes, file.getPath(), new URIImpl(file_url)) ;
/* I don't see any effect of this
if (mimeType != null && mimeType.equalsIgnoreCase("application/zip")) {
mLog.info("Is a zip-file. Try second round-detection ...");
// some new files like MS Office documents are zip files
// so rewrite the URL for the correct mimetype detection
mimeType = mimeTypeIdentifier.identify(bytes, null, new URIImpl("zip:mime:" + file_url));
}
*/
if (mimeType != null && mimeType.length() == 0)
mimeType = null;
// Identifiers may re-write MIME.
for (MimeTypeIdentifier identifier : extraIdentifiers)
mimeType = identifier.identify(mimeType, bytes, file);
mLog.info("Detected MIME-Type of " + file.getName() + " is " + mimeType);
return mimeType;
}
/**
* Return the standard extension of a specific MIME-Type.
* What are these files "normally" called?
*
* @param mimeType MIME-Type, e.g. "text/plain"
* @return Extension, e.g. "txt"
*/
public String getStandardExtensionForMimeType(String mimeType)
{
List<String> extensions = getExtensionsCached(mimeType);
if (extensions == null)
return null;
try {
return extensions.get(0);
} catch (IndexOutOfBoundsException e) {
return null;
}
}
Map<String, List<String>> extensionsCache = new HashMap<String, List<String>>();
@SuppressWarnings("unchecked")
protected List<String> getExtensionsCached(String mimeType) {
List<String> extensions = extensionsCache.get(mimeType);
if (extensions != null)
return extensions;
extensions = (List<String>) mimeTypeIdentifier.getExtensionsFor(mimeType);
for (MimeTypeIdentifier identifier : extraIdentifiers)
{
if (extensions != null)
return extensions;
extensions = identifier.getExtensionsFor(mimeType);
}
extensionsCache.put(mimeType, extensions);
return extensions;
}
/**
* Test if an given extension can contain a File of MIME-Type
* @param extension Filename extension (e.g. "txt")
* @param mimeType MIME-Type (e.g. "text/plain")
* @return True if compatible.
*/
public boolean doesExtensionMatchMimeType(String extension, String mimeType)
{
List<String> extensions = getExtensionsCached(mimeType);
if (extensions == null)
return false;
return extensions.contains(extension);
}
}