/* * Copyright 2007-2009 Medsea Business Solutions S.L. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package eu.medsea.mimeutil.detector; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.net.URL; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Timer; import java.util.TimerTask; import java.util.logging.Level; import com.delcyon.capo.CapoApplication; import eu.medsea.mimeutil.MimeException; import eu.medsea.mimeutil.MimeType; import eu.medsea.mimeutil.MimeUtil; /** * <p> * The Opendesktop shared mime database contains glob rules and magic number * lookup information to enable applications to detect the mime types of files. * </p> * <p> * This class uses the mime.cache file which is one of the files created by the * update-mime-database application. This file is a memory mapped file that * enables the database to be updated and copied without interrupting * applications. * </p> * <p> * This implementation follows the memory mapped spec so it is not required to * restart an application using this mime detector should the underlying * mime.cache database change. * </p> * <p> * For a complete description of the information contained in this file please * see: http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info- * spec-latest.html * </p> * <p> * This class also follows, where possible, the RECOMENDED order of detection as * detailed in this spec. Thanks go to Mathias Clasen at Red Hat for pointing me * to the original xdgmime implementation * http://svn.gnome.org/viewvc/glib/trunk/ * gio/xdgmime/xdgmimecache.c?revision=7784&view=markup * </p> * * @author Steven McArdle */ public class OpendesktopMimeDetector extends MimeDetector { private static String mimeCacheFile = "/usr/share/mime/mime.cache"; private static String internalMimeCacheFile = "src/main/resources/mime.cache"; private ByteBuffer content; private Timer timer; public OpendesktopMimeDetector(final String mimeCacheFile) { init(mimeCacheFile); } public OpendesktopMimeDetector() { init(mimeCacheFile); } private void init(final String mimeCacheFile) { String cacheFile = mimeCacheFile; if (!new File(cacheFile).exists()) { cacheFile = internalMimeCacheFile; } // Map the mime.cache file as a memory mapped file FileChannel rCh = null; try { RandomAccessFile raf = null; raf = new RandomAccessFile(cacheFile, "r"); rCh = (raf).getChannel(); content = rCh.map(FileChannel.MapMode.READ_ONLY, 0, rCh.size()); // Read all of the MIME type from the Alias list initMimeTypes(); CapoApplication.logger.fine("Registering a FileWatcher for [" + cacheFile + "]"); TimerTask task = new FileWatcher(new File(cacheFile)) { protected void onChange(File file) { initMimeTypes(); } }; timer = new Timer(); // repeat the check every 10 seconds timer.schedule(task, new Date(), 10000); } catch (Exception e) { throw new MimeException(e); } finally { if (rCh != null) { try { rCh.close(); } catch (Exception e) { CapoApplication.logger.log(Level.SEVERE, e.getLocalizedMessage(), e); } } } } public void delete() { // Cancel this timer timer.cancel(); } public String getDescription() { return "Resolve mime types for files and streams using the Opendesktop shared mime.cache file. Version [" + getMajorVersion() + "." + getMinorVersion() + "]."; } /** * This method resolves mime types closely in accordance with the RECOMENDED * order of detection detailed in the Opendesktop shared mime database * specification * http://standards.freedesktop.org/shared-mime-info-spec/shared * -mime-info-spec-latest.html See the Recommended checking order. */ public Collection getMimeTypesFileName(String fileName) { Collection mimeTypes = new ArrayList(); // Lookup the globbing methods first lookupMimeTypesForGlobFileName(fileName, mimeTypes); if (!mimeTypes.isEmpty()) { mimeTypes = normalizeWeightedMimeList((List) mimeTypes); } return mimeTypes; } /** * This method resolves mime types closely in accordance with the RECOMENDED * order of detection detailed in the Opendesktop shared mime database * specification * http://standards.freedesktop.org/shared-mime-info-spec/shared * -mime-info-spec-latest.html See the Recommended checking order. */ public Collection getMimeTypesURL(URL url) { Collection mimeTypes = getMimeTypesFileName(url.getPath()); return _getMimeTypes(mimeTypes, getInputStream(url)); } /** * This method resolves mime types closely in accordance with the RECOMENDED * order of detection detailed in the Opendesktop shared mime database * specification * http://standards.freedesktop.org/shared-mime-info-spec/shared * -mime-info-spec-latest.html See the Recommended checking order. */ public Collection getMimeTypesFile(File file) throws UnsupportedOperationException { Collection mimeTypes = getMimeTypesFileName(file.getName()); if (!file.exists()) { return mimeTypes; } return _getMimeTypes(mimeTypes, getInputStream(file)); } /** * This method is unable to perform glob matching as no name is available. * This means that it does not follow the recommended order of detection * defined in the shared mime database spec * http://standards.freedesktop.org/ * shared-mime-info-spec/shared-mime-info-spec-latest.html */ public Collection getMimeTypesInputStream(InputStream in) throws UnsupportedOperationException { return lookupMimeTypesForMagicData(in); } /** * This method is unable to perform glob matching as no name is available. * This means that it does not follow the recommended order of detection * defined in the shared mime database spec * http://standards.freedesktop.org/ * shared-mime-info-spec/shared-mime-info-spec-latest.html */ public Collection getMimeTypesByteArray(byte[] data) throws UnsupportedOperationException { return lookupMagicData(data); } public String dump() { return "{MAJOR_VERSION=" + getMajorVersion() + " MINOR_VERSION=" + getMinorVersion() + " ALIAS_LIST_OFFSET=" + getAliasListOffset() + " PARENT_LIST_OFFSET=" + getParentListOffset() + " LITERAL_LIST_OFFSET=" + getLiteralListOffset() + " REVERSE_SUFFIX_TREE_OFFSET=" + getReverseSuffixTreeOffset() + " GLOB_LIST_OFFSET=" + getGlobListOffset() + " MAGIC_LIST_OFFSET=" + getMagicListOffset() + " NAMESPACE_LIST_OFFSET=" + getNameSpaceListOffset() + " ICONS_LIST_OFFSET=" + getIconListOffset() + " GENERIC_ICONS_LIST_OFFSET=" + getGenericIconListOffset() + "}"; } private Collection lookupMimeTypesForMagicData(InputStream in) { int offset = 0; int len = getMaxExtents(); byte[] data = new byte[len]; // Mark the input stream in.mark(len); try { // Since an InputStream might return only some data (not all // requested), we have to read in a loop until // either EOF is reached or the desired number of bytes have been // read. int restBytesToRead = len; while (restBytesToRead > 0) { int bytesRead = in.read(data, offset, restBytesToRead); if (bytesRead < 0) break; // EOF offset += bytesRead; restBytesToRead -= bytesRead; } } catch (IOException ioe) { throw new MimeException(ioe); } finally { try { // Reset the input stream to where it was marked. in.reset(); } catch (Exception e) { throw new MimeException(e); } } return lookupMagicData(data); } private Collection lookupMagicData(byte[] data) { Collection mimeTypes = new ArrayList(); int listOffset = getMagicListOffset(); int numEntries = content.getInt(listOffset); int offset = content.getInt(listOffset + 8); for (int i = 0; i < numEntries; i++) { String mimeType = compareToMagicData(offset + (16 * i), data); if (mimeType != null) { mimeTypes.add(mimeType); } else { String nonMatch = getMimeType(content.getInt(offset + (16 * i) + 4)); mimeTypes.remove(nonMatch); } } return mimeTypes; } private String compareToMagicData(int offset, byte[] data) { // int priority = content.getInt(offset); int mimeOffset = content.getInt(offset + 4); int numMatches = content.getInt(offset + 8); int matchletOffset = content.getInt(offset + 12); for (int i = 0; i < numMatches; i++) { if (matchletMagicCompare(matchletOffset + (i * 32), data)) { return getMimeType(mimeOffset); } } return null; } private boolean matchletMagicCompare(int offset, byte[] data) { int rangeStart = content.getInt(offset); int rangeLength = content.getInt(offset + 4); int dataLength = content.getInt(offset + 12); int dataOffset = content.getInt(offset + 16); int maskOffset = content.getInt(offset + 20); for (int i = rangeStart; i <= rangeStart + rangeLength; i++) { boolean validMatch = true; if (i + dataLength > data.length) { return false; } if (maskOffset != 0) { for (int j = 0; j < dataLength; j++) { if ((content.get(dataOffset + j) & content.get(maskOffset + j)) != (data[j + i] & content.get(maskOffset + j))) { validMatch = false; break; } } } else { for (int j = 0; j < dataLength; j++) { if (content.get(dataOffset + j) != data[j + i]) { validMatch = false; break; } } } if (validMatch) { return true; } } return false; } private void lookupGlobLiteral(String fileName, Collection mimeTypes) { int listOffset = getLiteralListOffset(); int numEntries = content.getInt(listOffset); int min = 0; int max = numEntries - 1; while (max >= min) { int mid = (min + max) / 2; String literal = getString(content.getInt((listOffset + 4) + (12 * mid))); int cmp = literal.compareTo(fileName); if (cmp < 0) { min = mid + 1; } else if (cmp > 0) { max = mid - 1; } else { String mimeType = getMimeType(content.getInt((listOffset + 4) + (12 * mid) + 4)); int weight = content.getInt((listOffset + 4) + (12 * mid) + 8); mimeTypes.add(new WeightedMimeType(mimeType, literal, weight)); return; } } } private void lookupGlobFileNameMatch(String fileName, Collection mimeTypes) { int listOffset = getGlobListOffset(); int numEntries = content.getInt(listOffset); for (int i = 0; i < numEntries; i++) { int offset = content.getInt((listOffset + 4) + (12 * i)); int mimeTypeOffset = content.getInt((listOffset + 4) + (12 * i) + 4); int weight = content.getInt((listOffset + 4) + (12 * i) + 8); String pattern = getString(offset, true); String mimeType = getMimeType(mimeTypeOffset); if (fileName.matches(pattern)) { mimeTypes.add(new WeightedMimeType(mimeType, pattern, weight)); } } } private Collection normalizeWeightedMimeList(Collection weightedMimeTypes) { Collection mimeTypes = new LinkedHashSet(); // Sort the weightedMimeTypes Collections.sort((List) weightedMimeTypes, new Comparator() { public int compare(Object obj1, Object obj2) { return ((WeightedMimeType) obj1).weight - ((WeightedMimeType) obj2).weight; } }); // Keep only globs with the biggest weight. They are in weight order at // this point int weight = 0; int patternLen = 0; for (Iterator it = weightedMimeTypes.iterator(); it.hasNext();) { WeightedMimeType mw = (WeightedMimeType) it.next(); if (weight < mw.weight) { weight = mw.weight; } if (weight >= mw.weight) { if (mw.pattern.length() > patternLen) { patternLen = mw.pattern.length(); } mimeTypes.add(mw); } } // Now keep only the longest patterns for (Iterator it = weightedMimeTypes.iterator(); it.hasNext();) { WeightedMimeType mw = (WeightedMimeType) it.next(); if (mw.pattern.length() < patternLen) { mimeTypes.remove(mw); } } // Could possibly have multiple mimeTypes here with the same weight and // pattern length. Can even have multiple entries for the same type so // lets remove // any duplicates by copying entries to a HashSet that can only have a // single instance // of each type Collection _mimeTypes = new HashSet(); for (Iterator it = mimeTypes.iterator(); it.hasNext();) { _mimeTypes.add(((WeightedMimeType) it.next()).toString()); } return _mimeTypes; } private void lookupMimeTypesForGlobFileName(String fileName, Collection mimeTypes) { if (fileName == null) { return; } lookupGlobLiteral(fileName, mimeTypes); if (!mimeTypes.isEmpty()) { return; } int len = fileName.length(); lookupGlobSuffix(fileName, false, len, mimeTypes); if (mimeTypes.isEmpty()) { lookupGlobSuffix(fileName, true, len, mimeTypes); } if (mimeTypes.isEmpty()) { lookupGlobFileNameMatch(fileName, mimeTypes); } } private void lookupGlobSuffix(String fileName, boolean ignoreCase, int len, Collection mimeTypes) { int listOffset = getReverseSuffixTreeOffset(); int numEntries = content.getInt(listOffset); int offset = content.getInt(listOffset + 4); lookupGlobNodeSuffix(fileName, numEntries, offset, ignoreCase, len, mimeTypes, new StringBuffer()); } private void lookupGlobNodeSuffix(String fileName, int numEntries, int offset, boolean ignoreCase, int len, Collection mimeTypes, StringBuffer pattern) { char character = ignoreCase ? fileName.toLowerCase().charAt(len - 1) : fileName.charAt(len - 1); if (character == 0) { return; } int min = 0; int max = numEntries - 1; while (max >= min && len >= 0) { int mid = (min + max) / 2; char matchChar = (char) content.getInt(offset + (12 * mid)); if (matchChar < character) { min = mid + 1; } else if (matchChar > character) { max = mid - 1; } else { len--; int numChildren = content.getInt(offset + (12 * mid) + 4); int childOffset = content.getInt(offset + (12 * mid) + 8); if (len > 0) { pattern.append(matchChar); lookupGlobNodeSuffix(fileName, numChildren, childOffset, ignoreCase, len, mimeTypes, pattern); } if (mimeTypes.isEmpty()) { for (int i = 0; i < numChildren; i++) { matchChar = (char) content.getInt(childOffset + (12 * i)); if (matchChar != 0) { break; } int mimeOffset = content.getInt(childOffset + (12 * i) + 4); int weight = content.getInt(childOffset + (12 * i) + 8); mimeTypes.add(new WeightedMimeType(getMimeType(mimeOffset), pattern.toString(), weight)); } } return; } } } class WeightedMimeType extends MimeType { private static final long serialVersionUID = 1L; String pattern; int weight; WeightedMimeType(String mimeType, String pattern, int weight) { super(mimeType); this.pattern = pattern; this.weight = weight; } } private int getMaxExtents() { return content.getInt(getMagicListOffset() + 4); } private String aliasLookup(String alias) { int aliasListOffset = getAliasListOffset(); int min = 0; int max = content.getInt(aliasListOffset) - 1; while (max >= min) { int mid = (min + max) / 2; // content.position((aliasListOffset + 4) + (mid * 8)); int aliasOffset = content.getInt((aliasListOffset + 4) + (mid * 8)); int mimeOffset = content.getInt((aliasListOffset + 4) + (mid * 8) + 4); int cmp = getMimeType(aliasOffset).compareTo(alias); if (cmp < 0) { min = mid + 1; } else if (cmp > 0) { max = mid - 1; } else { return getMimeType(mimeOffset); } } return null; } private String unaliasMimeType(String mimeType) { String lookup = aliasLookup(mimeType); return lookup == null ? mimeType : lookup; } private boolean isMimeTypeSubclass(String mimeType, String subClass) { String umimeType = unaliasMimeType(mimeType); String usubClass = unaliasMimeType(subClass); MimeType _mimeType = new MimeType(umimeType); MimeType _subClass = new MimeType(usubClass); if (umimeType.compareTo(usubClass) == 0) { return true; } if (isSuperType(usubClass) && (_mimeType.getMediaType().equals(_subClass.getMediaType()))) { return true; } // Handle special cases text/plain and application/octet-stream if (usubClass.equals("text/plain") && _mimeType.getMediaType().equals("text")) { return true; } if (usubClass.equals("application/octet-stream")) { return true; } int parentListOffset = getParentListOffset(); int numParents = content.getInt(parentListOffset); int min = 0; int max = numParents - 1; while (max >= min) { int med = (min + max) / 2; int offset = content.getInt((parentListOffset + 4) + (8 * med)); String parentMime = getMimeType(offset); int cmp = parentMime.compareTo(umimeType); if (cmp < 0) { min = med + 1; } else if (cmp > 0) { max = med - 1; } else { offset = content.getInt((parentListOffset + 4) + (8 * med) + 4); int _numParents = content.getInt(offset); for (int i = 0; i < _numParents; i++) { int parentOffset = content.getInt((offset + 4) + (4 * i)); if (isMimeTypeSubclass(getMimeType(parentOffset), usubClass)) { return true; } } break; } } return false; } private boolean isSuperType(String mimeType) { String type = mimeType.substring(mimeType.length() - 2); if (type.equals("/*")) { return true; } return false; } private int getGenericIconListOffset() { return content.getInt(36); } private int getIconListOffset() { return content.getInt(32); } private int getNameSpaceListOffset() { return content.getInt(28); } private int getMagicListOffset() { return content.getInt(24); } private int getGlobListOffset() { return content.getInt(20); } private int getReverseSuffixTreeOffset() { return content.getInt(16); } private int getLiteralListOffset() { return content.getInt(12); } private int getParentListOffset() { return content.getInt(8); } private int getAliasListOffset() { return content.getInt(4); } private short getMinorVersion() { return content.getShort(2); } private short getMajorVersion() { return content.getShort(0); } private String getMimeType(int offset) { return getString(offset); } private String getString(int offset) { return getString(offset, false); } private String getString(int offset, boolean regularExpression) { int position = content.position(); content.position(offset); StringBuffer buf = new StringBuffer(); char c = 0; while ((c = (char) content.get()) != 0) { if (regularExpression) { switch (c) { case '.': buf.append("\\"); break; case '*': case '+': case '?': buf.append("."); } } buf.append(c); } // Reset position content.position(position + 4); if (regularExpression) { buf.insert(0, '^'); buf.append('$'); } return buf.toString(); } private InputStream getInputStream(File file) { try { return new FileInputStream(file); } catch (Exception e) { CapoApplication.logger.log(Level.SEVERE, "Error getting InputStream for file [" + file.getAbsolutePath() + "]", e); } return null; } private InputStream getInputStream(URL url) { try { return MimeUtil.getInputStreamForURL(url); } catch (Exception e) { throw new MimeException("Error getting InputStream for URL [" + url.getPath() + "]", e); } } private Collection _getMimeTypes(Collection mimeTypes, InputStream in) { try { if (mimeTypes.isEmpty() || mimeTypes.size() > 1) { Collection _mimeTypes = getMimeTypesInputStream(in = new BufferedInputStream(in)); if (!_mimeTypes.isEmpty()) { if (!mimeTypes.isEmpty()) { // more than one glob matched // Check for same mime type for (Iterator it = mimeTypes.iterator(); it.hasNext();) { String mimeType = (String) it.next(); if (_mimeTypes.contains(mimeType)) { // mimeTypes = new ArrayList(); mimeTypes.add(mimeType); // return mimeTypes; } // Check for mime type subtype for (Iterator _it = _mimeTypes.iterator(); _it.hasNext();) { String _mimeType = (String) _it.next(); if (isMimeTypeSubclass(mimeType, _mimeType)) { // mimeTypes = new ArrayList(); mimeTypes.add(mimeType); // return mimeTypes; } } } } else { // No globs matched but we have magic matches return _mimeTypes; } } } } catch (Exception e) { throw new MimeException(e); } finally { closeStream(in); } return mimeTypes; } // The Alias list should contain just about all the mime types used by // this MimeDetector so we will be content with these entries private void initMimeTypes() { int listOffset = getAliasListOffset(); int numAliases = content.getInt(listOffset); for (int i = 0; i < numAliases; i++) { MimeUtil.addKnownMimeType(getString(content.getInt((listOffset + 4) + (i * 8)))); // MimeUtil.addKnownMimeType(getString(content.getInt((listOffset + 8) + (i * 8)))); } } } abstract class FileWatcher extends TimerTask { private long timeStamp; private File file; public FileWatcher(File file) { this.file = file; this.timeStamp = file.lastModified(); } public final void run() { long timeStamp = file.lastModified(); // Only do this if the file timestamp has changed if (this.timeStamp != timeStamp) { this.timeStamp = timeStamp; onChange(file); } } protected abstract void onChange(File file); }