/*
* Copyright 2007-2009 Medsea Business Solutions S.L.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.medsea.mimeutil;
import java.io.File;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.zip.ZipException;
import com.delcyon.capo.CapoApplication;
import eu.medsea.mimeutil.detector.ExtensionMimeDetector;
import eu.medsea.mimeutil.detector.MagicMimeMimeDetector;
import eu.medsea.mimeutil.detector.MimeDetector;
import eu.medsea.mimeutil.detector.OpendesktopMimeDetector;
import eu.medsea.util.EncodingGuesser;
import eu.medsea.util.StringUtil;
import eu.medsea.util.ZipJarUtil;
/**
* <p>
* The <code>MimeUtil2</code> is a utility class that allows applications to
* detect, work with and manipulate MIME types.
* </p>
* <p>
* A MIME or "Multipurpose Internet Mail Extension" type is an Internet standard
* that is important outside of just e-mail use. MIME is used extensively in
* other communications protocols such as HTTP for web communications. IANA
* "Internet Assigned Numbers Authority" is responsible for the standardisation
* and publication of MIME types. Basically any resource on any computer that
* can be located via a URL can be assigned a MIME type. So for instance, JPEG
* images have a MIME type of image/jpg. Some resources can have multiple MIME
* types associated with them such as files with an XML extension have the MIME
* types text/xml and application/xml and even specialised versions of xml such
* as image/svg+xml for SVG image files.
* </p>
* <p>
* To do this <code>MimeUtil2</code> uses registered <code>MimeDetector</code>
* (s) that are delegated too in sequence to actually perform the detection.
* There are several <code>MimeDetector</code> implementations that come with
* the utility and you can register and unregister them to perform detection
* based on file extensions, file globing and magic number detection.<br/>
* Their is also a fourth MimeDetector that is registered by default that
* detects text files and encodings. Unlike the other MimeDetector(s) or any
* MimeDetector(s) you may choose to implement, the TextMimeDetector cannot be
* registered or unregistered by your code. It is advisable that you read the
* java doc for the TextMimeDetector as it can be modified in several ways to
* make it perform better and or detect more specific types.<br/>
* Please refer to the java doc for each of these <code>MimeDetector</code>(s)
* for a description of how they actually perform their particular detection
* process.
* </p>
* <p>
* It is important to note that MIME matching is not an exact science, meaning
* that a positive match does not guarantee that the returned MIME type is
* actually correct. It is a best guess method of matching and the matched MIME
* types should be used with this in mind.
* </p>
* <p>
* New <code>MimeDetector</code>(s) can easily be created and registered with
* <code>MimeUtil2</code> to extend it's functionality beyond these initial
* detection strategies by extending the <code>AbstractMimeDetector</code>
* class. To see how to implement your own <code>MimeDetector</code> take a look
* at the java doc and source code for the {@link ExtensionMimeDetector},
* {@link MagicMimeMimeDetector} and {@link OpendesktopMimeDetector} classes. To
* register and unregister MimeDetector(s) use the [un]registerMimeDetector(...)
* methods of this class.
* </p>
* <p>
* The order that the <code>MimeDetector</code>(s) are executed is defined by
* the order each <code>MimeDetector</code> is registered.
* </p>
* <p>
* The resulting <code>Collection</code> of mime types returned in response to a
* getMimeTypes(...) call is a normalised list of the accumulation of MIME types
* returned by each of the registered <code>MimeDetector</code>(s) that
* implement the specified getMimeTypesXXX(...) methods.
* </p>
* <p>
* All methods in this class that return a Collection object containing
* MimeType(s) actually return a {@link MimeTypeHashSet} that implements both
* the {@link Set} and {@link Collection} interfaces.
* </p>
*
* @author Steven McArdle.
* @since 2.1
*/
public class MimeUtil2
{
/**
* Mime type used to identify a directory
*/
public static final MimeType DIRECTORY_MIME_TYPE = new MimeType("application/directory");
/**
* Mime type used to identify an unknown MIME type
*/
public static final MimeType UNKNOWN_MIME_TYPE = new MimeType("application/octet-stream");
private static final Pattern mimeSplitter = Pattern.compile("[/;]++");
// All mime types known to the utility. This is synchronised for
// multi-threaded use
// and ALL instances of MimeUtil2 share this list.
private static Map mimeTypes = Collections.synchronizedMap(new HashMap());
// the native byte order of the underlying OS. "BIG" or "little" Endian
private static ByteOrder nativeByteOrder = ByteOrder.nativeOrder();
private MimeDetectorRegistry mimeDetectorRegistry = new MimeDetectorRegistry();
/**
* While MimeType(s) are being loaded by the MimeDetector(s) they should be
* added to the list of known MIME types. It is not mandatory for
* MimeDetector(s) to do so but they should where possible so that the list
* is as complete as possible. You can add other MIME types to this list
* using this method. You can then use the isMimeTypeKnown(...) utility
* methods to see if a MIME type you have matches one that the utility has
* already seen.
* <p>
* This can be used to limit the mime types you work with i.e. if its not
* been loaded then don't bother using it as it won't match. This is no
* guarantee that a match will not be found as it is possible that a
* particular MimeDetector does not have an initialisation phase that loads
* all of the MIME types it will match.
* </p>
* <p>
* For instance if you had a MIME type of abc/xyz and passed this to
* isMimeTypeKnown(...) it would return false unless you specifically add
* this to the know MIME types using this method.
* </p>
*
* @param mimeType
* a MIME type you want to add to the known MIME types.
* Duplicates are ignored.
* @see #isMimeTypeKnown(String mimeType)
* @see #isMimeTypeKnown(MimeType mimetType)
*/
public static void addKnownMimeType(final MimeType mimeType)
{
addKnownMimeType(mimeType.toString());
}
/**
* While MimeType(s) are being loaded by the MimeDetector(s) they should be
* added to the list of known MIME types. It is not mandatory for
* MimeDetector(s) to do so but they should where possible so that the list
* is as complete as possible. You can add other MIME types to this list
* using this method. You can then use the isMimeTypeKnown(...) utility
* methods to see if a MIME type you have matches one that the utility has
* already seen.
* <p>
* This can be used to limit the mime types you work with i.e. if its not
* been loaded then don't bother using it as it won't match. This is no
* guarantee that a match will not be found as it is possible that a
* particular MimeDetector does not have an initialisation phase that loads
* all of the MIME types it will match.
* </p>
* <p>
* For instance if you had a MIME type of abc/xyz and passed this to
* isMimeTypeKnown(...) it would return false unless you specifically add
* this to the know MIME types using this method.
* </p>
*
* @param mimeType
* a MIME type you want to add to the known MIME types.
* Duplicates are ignored.
* @see #isMimeTypeKnown(String mimetype)
* @see #isMimeTypeKnown(MimeType mimetType)
*/
public static void addKnownMimeType(final String mimeType)
{
try
{
String key = getMediaType(mimeType);
Set s = (Set) mimeTypes.get(key);
if (s == null)
{
s = new TreeSet();
}
s.add(getSubType(mimeType));
mimeTypes.put(key, s);
} catch (MimeException ignore)
{
// A couple of entries in the magic mime file don't follow the rules
// so ignore them
}
}
/**
* Returns a copy of the Collection of currently known MIME types as strings
* that have been registered either by the initialisation methods of the
* MimeDetector(s) or by the user.
*/
public static Collection getKnownMimeTypes()
{
Collection mimeTypes = new ArrayList();
Iterator i = MimeUtil2.mimeTypes.keySet().iterator();
while (i.hasNext())
{
// Iterate through each set and compose the MIME types
String mediaType = (String) i.next();
Iterator it = ((Set) MimeUtil2.mimeTypes.get(mediaType)).iterator();
while (it.hasNext())
{
mimeTypes.add(mediaType + "/" + (String) it.next());
}
}
return mimeTypes;
}
/**
* Register a MimeDetector and add it to the MimeDetector registry.
* MimeDetector(s) are effectively singletons as they are keyed against
* their fully qualified class name.
*
* @param mimeDetector
* . This must be the fully qualified name of a concrete instance
* of an AbstractMimeDetector class. This enforces that all
* custom MimeDetector(s) extend the AbstractMimeDetector.
* @see MimeDetector
*/
public MimeDetector registerMimeDetector(final String mimeDetector)
{
return mimeDetectorRegistry.registerMimeDetector(mimeDetector);
}
/**
* Get the extension part of a file name defined by the file parameter.
*
* @param file
* a file object
* @return the file extension or null if it does not have one.
*/
public static String getExtension(final File file)
{
return getExtension(file.getName());
}
/**
* Get the extension part of a file name defined by the fileName parameter.
* There may be no extension or it could be a single part extension such as
* .bat or a multi-part extension such as .tar.gz
*
* @param fileName
* a relative or absolute path to a file
* @return the file extension or null if it does not have one.
*/
public static String getExtension(final String fileName)
{
if (fileName == null || fileName.length() == 0)
{
return "";
}
int index = fileName.indexOf(".");
return index < 0 ? "" : fileName.substring(index + 1);
}
/**
* Get the first in a comma separated list of mime types. Useful when using
* extension mapping that can return multiple mime types separate by commas
* and you only want the first one.
*
* @param mimeTypes
* comma separated list of mime types
* @return first in a comma separated list of mime types or null if the
* mimeTypes string is null or empty
*/
public static MimeType getFirstMimeType(final String mimeTypes)
{
if (mimeTypes != null && mimeTypes.trim().length() != 0)
{
return new MimeType(mimeTypes.split(",")[0].trim());
}
return null;
}
/**
* Utility method to get the major or media part of a mime type i.e. the bit
* before the '/' character
*
* @param mimeType
* you want to get the media part from
* @return media type of the mime type
* @throws MimeException
* if you pass in an invalid mime type structure
*/
public static String getMediaType(final String mimeType) throws MimeException
{
return new MimeType(mimeType).getMediaType();
}
/**
* Utility method to get the quality part of a mime type. If it does not
* exist then it is always set to q=1.0 unless it's a wild card. For the
* major component wild card the value is set to 0.01 For the minor
* component wild card the value is set to 0.02
* <p>
* Thanks to the Apache organisation for these settings.
*
* @param mimeType
* a valid mime type string with or without a valid q parameter
* @return the quality value of the mime type either calculated from the
* rules above or the actual value defined.
* @throws MimeException
* this is thrown if the mime type pattern is invalid.
*/
public static double getMimeQuality(final String mimeType) throws MimeException
{
if (mimeType == null)
{
throw new MimeException("Invalid MimeType [" + mimeType + "].");
}
String[] parts = mimeSplitter.split(mimeType);
if (parts.length < 2)
{
throw new MimeException("Invalid MimeType [" + mimeType + "].");
}
if (parts.length > 2)
{
for (int i = 2; i < parts.length; i++)
{
if (parts[i].trim().startsWith("q="))
{
// Get the number part
try
{
// Get the quality factor
double d = Double.parseDouble(parts[i].split("=")[1].trim());
return d > 1.0 ? 1.0 : d;
} catch (NumberFormatException e)
{
throw new MimeException("Invalid MIME quality indicator [" + parts[i].trim() + "]. Must be a valid double between 0 and 1");
} catch (Exception e)
{
throw new MimeException("Error parsing MIME quality indicator.", e);
}
}
}
}
// No quality indicator so always assume its 1 unless a wild card is
// used
if (StringUtil.contains(parts[0], "*"))
{
return 0.01;
}
else if (StringUtil.contains(parts[1], "*"))
{
return 0.02;
}
else
{
// Assume q value of 1
return 1.0;
}
}
/**
* Get a registered MimeDetector by name.
*
* @param name
* the name of a registered MimeDetector. This is always the
* fully qualified name of the class implementing the
* MimeDetector.
* @return
*/
public MimeDetector getMimeDetector(final String name)
{
return mimeDetectorRegistry.getMimeDetector(name);
}
/**
* Get a Collection of possible MimeType(s) that this byte array could
* represent according to the registered MimeDetector(s). If no MimeType(s)
* are detected then the returned Collection will contain only the
* UNKNOWN_MIME_TYPE
*
* @param data
* @return all matching MimeType(s)
* @throws MimeException
*/
public final Collection getMimeTypes(final byte[] data) throws MimeException
{
return getMimeTypes(data, UNKNOWN_MIME_TYPE);
}
/**
* Get a Collection of possible MimeType(s) that this byte array could
* represent according to the registered MimeDetector(s). If no MimeType(s)
* are detected then the returned Collection will contain only the passed in
* unknownMimeType
*
* @param data
* @param unknownMimeType
* used if the registered MimeDetector(s) fail to match any
* MimeType(s)
* @return all matching MimeType(s)
* @throws MimeException
*/
public final Collection getMimeTypes(final byte[] data, final MimeType unknownMimeType) throws MimeException
{
Collection mimeTypes = new MimeTypeHashSet();
if (data == null)
{
CapoApplication.logger.log(Level.SEVERE,"byte array cannot be null.");
}
else
{
try
{
CapoApplication.logger.finest("Getting MIME types for byte array [" + StringUtil.getHexString(data) + "].");
} catch (UnsupportedEncodingException e)
{
throw new MimeException(e);
}
mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(data));
// We don't want the unknownMimeType added to the collection by
// MimeDetector(s)
mimeTypes.remove(unknownMimeType);
}
// If the collection is empty we want to add the unknownMimetype
if (mimeTypes.isEmpty())
{
mimeTypes.add(unknownMimeType);
}
CapoApplication.logger.finest("Retrieved MIME types [" + mimeTypes.toString() + "]");
return mimeTypes;
}
/**
* Get all of the matching mime types for this file object. The method
* delegates down to each of the registered MimeHandler(s) and returns a
* normalised list of all matching mime types. If no matching mime types are
* found the returned Collection will contain the default UNKNOWN_MIME_TYPE
*
* @param file
* the File object to detect.
* @return collection of matching MimeType(s)
* @throws MimeException
* if there are problems such as reading files generated when
* the MimeHandler(s) executed.
*/
public final Collection getMimeTypes(final File file) throws MimeException
{
return getMimeTypes(file, UNKNOWN_MIME_TYPE);
}
/**
* Get all of the matching mime types for this file object. The method
* delegates down to each of the registered MimeHandler(s) and returns a
* normalised list of all matching mime types. If no matching mime types are
* found the returned Collection will contain the unknownMimeType passed in.
*
* @param file
* the File object to detect.
* @param unknownMimeType
* .
* @return the Collection of matching mime types. If the collection would be
* empty i.e. no matches then this will contain the passed in
* parameter unknownMimeType
* @throws MimeException
* if there are problems such as reading files generated when
* the MimeHandler(s) executed.
*/
public final Collection getMimeTypes(final File file, final MimeType unknownMimeType) throws MimeException
{
Collection mimeTypes = new MimeTypeHashSet();
if (file == null)
{
CapoApplication.logger.log(Level.SEVERE,"File reference cannot be null.");
}
else
{
CapoApplication.logger.finest("Getting MIME types for file [" + file.getAbsolutePath() + "].");
if (file.isDirectory())
{
mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
}
else
{
// Defer this call to the file name and stream methods
mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(file));
// We don't want the unknownMimeType added to the collection by
// MimeDetector(s)
mimeTypes.remove(unknownMimeType);
}
}
// If the collection is empty we want to add the unknownMimetype
if (mimeTypes.isEmpty())
{
mimeTypes.add(unknownMimeType);
}
CapoApplication.logger.finest("Retrieved MIME types [" + mimeTypes.toString() + "]");
return mimeTypes;
}
/**
* Get all of the matching mime types for this InputStream object. The
* method delegates down to each of the registered MimeHandler(s) and
* returns a normalised list of all matching mime types. If no matching mime
* types are found the returned Collection will contain the default
* UNKNOWN_MIME_TYPE
*
* @param in
* InputStream to detect.
* @return
* @throws MimeException
* if there are problems such as reading files generated when
* the MimeHandler(s) executed.
*/
public final Collection getMimeTypes(final InputStream in) throws MimeException
{
return getMimeTypes(in, UNKNOWN_MIME_TYPE);
}
/**
* Get all of the matching mime types for this InputStream object. The
* method delegates down to each of the registered MimeHandler(s) and
* returns a normalised list of all matching mime types. If no matching mime
* types are found the returned Collection will contain the unknownMimeType
* passed in.
*
* @param in
* the InputStream object to detect.
* @param unknownMimeType
* .
* @return the Collection of matching mime types. If the collection would be
* empty i.e. no matches then this will contain the passed in
* parameter unknownMimeType
* @throws MimeException
* if there are problems such as reading files generated when
* the MimeHandler(s) executed.
*/
public final Collection getMimeTypes(final InputStream in, final MimeType unknownMimeType) throws MimeException
{
Collection mimeTypes = new MimeTypeHashSet();
if (in == null)
{
CapoApplication.logger.log(Level.SEVERE,"InputStream reference cannot be null.");
}
else
{
if (!in.markSupported())
{
throw new MimeException("InputStream must support the mark() and reset() methods.");
}
CapoApplication.logger.finest("Getting MIME types for InputSteam [" + in + "].");
mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(in));
// We don't want the unknownMimeType added to the collection by
// MimeDetector(s)
mimeTypes.remove(unknownMimeType);
}
// If the collection is empty we want to add the unknownMimetype
if (mimeTypes.isEmpty())
{
mimeTypes.add(unknownMimeType);
}
CapoApplication.logger.finest("Retrieved MIME types [" + mimeTypes.toString() + "]");
return mimeTypes;
}
/**
* Get all of the matching mime types for this file name. The method
* delegates down to each of the registered MimeHandler(s) and returns a
* normalised list of all matching mime types. If no matching mime types are
* found the returned Collection will contain the default UNKNOWN_MIME_TYPE
*
* @param fileName
* the name of a file to detect.
* @return collection of matching MimeType(s)
* @throws MimeException
* if there are problems such as reading files generated when
* the MimeHandler(s) executed.
*/
public final Collection getMimeTypes(final String fileName) throws MimeException
{
return getMimeTypes(fileName, UNKNOWN_MIME_TYPE);
}
/**
* Get all of the matching mime types for this file name . The method
* delegates down to each of the registered MimeHandler(s) and returns a
* normalised list of all matching mime types. If no matching mime types are
* found the returned Collection will contain the unknownMimeType passed in.
*
* @param fileName
* the name of a file to detect.
* @param unknownMimeType
* .
* @return the Collection of matching mime types. If the collection would be
* empty i.e. no matches then this will contain the passed in
* parameter unknownMimeType
* @throws MimeException
* if there are problems such as reading files generated when
* the MimeHandler(s) executed.
*/
public final Collection getMimeTypes(final String fileName, final MimeType unknownMimeType) throws MimeException
{
Collection mimeTypes = new MimeTypeHashSet();
if (fileName == null)
{
CapoApplication.logger.log(Level.SEVERE,"fileName cannot be null.");
}
else
{
CapoApplication.logger.finest("Getting MIME types for file name [" + fileName + "].");
// Test if this is a directory
File file = new File(fileName);
if (file.isDirectory())
{
mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
}
else
{
mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(fileName));
// We don't want the unknownMimeType added to the collection by
// MimeDetector(s)
mimeTypes.remove(unknownMimeType);
}
}
// If the collection is empty we want to add the unknownMimetype
if (mimeTypes.isEmpty())
{
mimeTypes.add(unknownMimeType);
}
CapoApplication.logger.finest("Retrieved MIME types [" + mimeTypes.toString() + "]");
return mimeTypes;
}
/**
* Get all of the matching mime types for this URL object. The method
* delegates down to each of the registered MimeHandler(s) and returns a
* normalised list of all matching mime types. If no matching mime types are
* found the returned Collection will contain the default UNKNOWN_MIME_TYPE
*
* @param url
* a URL to detect.
* @return Collection of matching MimeType(s)
* @throws MimeException
* if there are problems such as reading files generated when
* the MimeHandler(s) executed.
*/
public final Collection getMimeTypes(final URL url) throws MimeException
{
return getMimeTypes(url, UNKNOWN_MIME_TYPE);
}
public final Collection getMimeTypes(final URL url, final MimeType unknownMimeType) throws MimeException
{
Collection mimeTypes = new MimeTypeHashSet();
if (url == null)
{
CapoApplication.logger.log(Level.SEVERE,"URL reference cannot be null.");
}
else
{
CapoApplication.logger.finest("Getting MIME types for URL [" + url + "].");
// Test if this is a directory
File file = new File(url.getPath());
if (file.isDirectory())
{
mimeTypes.add(MimeUtil2.DIRECTORY_MIME_TYPE);
}
else
{
// defer these calls to the file name and stream methods
mimeTypes.addAll(mimeDetectorRegistry.getMimeTypes(url));
// We don't want the unknownMimeType added to the collection by
// MimeDetector(s)
mimeTypes.remove(unknownMimeType);
}
}
// If the collection is empty we want to add the unknownMimetype
if (mimeTypes.isEmpty())
{
mimeTypes.add(unknownMimeType);
}
CapoApplication.logger.finest("Retrieved MIME types [" + mimeTypes.toString() + "]");
return mimeTypes;
}
/**
* Get the native byte order of the OS on which you are running. It will be
* either big or little endian. This is used internally for the magic mime
* rules mapping.
*
* @return ByteOrder
*/
public static ByteOrder getNativeOrder()
{
return MimeUtil2.nativeByteOrder;
}
/**
* Gives you the best match for your requirements.
* <p>
* You can pass the accept header from a browser request to this method
* along with a comma separated list of possible mime types returned from
* say getExtensionMimeTypes(...) and the best match according to the accept
* header will be returned.
* </p>
* <p>
* The following is typical of what may be specified in an HTTP Accept
* header:
* </p>
* <p>
* Accept: text/xml, application/xml, application/xhtml+xml,
* text/html;q=0.9, text/plain;q=0.8, video/x-mng, image/png, image/jpeg,
* image/gif;q=0.2, text/css, */*;q=0.1
* </p>
* <p>
* The quality parameter (q) indicates how well the user agent handles the
* MIME type. A value of 1 indicates the MIME type is understood perfectly,
* and a value of 0 indicates the MIME type isn't understood at all.
* </p>
* <p>
* The reason the image/gif MIME type contains a quality parameter of 0.2,
* is to indicate that PNG & JPEG are preferred over GIF if the server is
* using content negotiation to deliver either a PNG or a GIF to user
* agents. Similarly, the text/html quality parameter has been lowered a
* little, to ensure that the XML MIME types are given in preference if
* content negotiation is being used to serve an XHTML document.
* </p>
*
* @param accept
* is a comma separated list of mime types you can accept
* including QoS parameters. Can pass the Accept: header
* directly.
* @param canProvide
* is a comma separated list of mime types that can be provided
* such as that returned from a call to
* getExtensionMimeTypes(...)
* @return the best matching mime type possible.
*/
public static MimeType getPreferedMimeType(String accept, final String canProvide)
{
if (canProvide == null || canProvide.trim().length() == 0)
{
throw new MimeException("Must specify at least one MIME type that can be provided.");
}
if (accept == null || accept.trim().length() == 0)
{
accept = "*/*";
}
// If an accept header is passed in then lets remove the Accept part
if (accept.indexOf(":") > 0)
{
accept = accept.substring(accept.indexOf(":") + 1);
}
// Remove any unwanted spaces from the wanted mime types for instance
// text/html; q=0.4
accept = accept.replaceAll(" ", "");
return getBestMatch(accept, getList(canProvide));
}
/**
* Get the most specific match of the Collection of mime types passed in.
* The Collection
*
* @param mimeTypes
* this should be the Collection of mime types returned from a
* getMimeTypes(...) call.
* @return the most specific MimeType. If more than one of the mime types in
* the Collection have the same value then the first one found with
* this value in the Collection is returned.
*/
public static MimeType getMostSpecificMimeType(final Collection mimeTypes)
{
MimeType mimeType = null;
int specificity = 0;
for (Iterator it = mimeTypes.iterator(); it.hasNext();)
{
MimeType mt = (MimeType) it.next();
if (mt.getSpecificity() > specificity)
{
mimeType = mt;
specificity = mimeType.getSpecificity();
}
}
return mimeType;
}
/**
* Utility method to get the minor part of a mime type i.e. the bit after
* the '/' character
*
* @param mimeType
* you want to get the minor part from
* @return sub type of the mime type
* @throws MimeException
* if you pass in an invalid mime type structure
*/
public static String getSubType(final String mimeType) throws MimeException
{
return new MimeType(mimeType).getSubType();
}
/**
* Check to see if this mime type is one of the types seen during
* initialisation or has been added at some later stage using
* addKnownMimeType(...)
*
* @param mimeType
* @return true if the mimeType is in the list else false is returned
* @see #addKnownMimeType(String mimetype)
*/
public static boolean isMimeTypeKnown(final MimeType mimeType)
{
try
{
Set s = (Set) mimeTypes.get(mimeType.getMediaType());
if (s == null)
{
return false;
}
return s.contains(mimeType.getSubType());
} catch (MimeException e)
{
return false;
}
}
/**
* Check to see if this mime type is one of the types seen during
* initialisation or has been added at some later stage using
* addKnownMimeType(...)
*
* @param mimeType
* @return true if the mimeType is in the list else false is returned
* @see #addKnownMimeType(String mimetype)
*/
public static boolean isMimeTypeKnown(final String mimeType)
{
return isMimeTypeKnown(new MimeType(mimeType));
}
/**
* Utility convenience method to check if a particular MimeType instance is
* actually a TextMimeType. Used when iterating over a collection of
* MimeType's to help with casting to enable access the the TextMimeType
* methods not available to a standard MimeType. Can also use instanceof.
*
* @param mimeType
* @return true if the passed in instance is a TextMimeType
* @see MimeType
* @see TextMimeType
*/
public static boolean isTextMimeType(final MimeType mimeType)
{
return mimeType instanceof TextMimeType;
}
/**
* Remove a previously registered MimeDetector
*
* @param mimeDetector
* @return the MimeDetector that was removed from the registry else null.
*/
public MimeDetector unregisterMimeDetector(final MimeDetector mimeDetector)
{
return mimeDetectorRegistry.unregisterMimeDetector(mimeDetector);
}
/**
* Remove a previously registered MimeDetector
*
* @param mimeDetector
* @return the MimeDetector that was removed from the registry else null.
*/
public MimeDetector unregisterMimeDetector(final String mimeDetector)
{
return mimeDetectorRegistry.unregisterMimeDetector(mimeDetector);
}
/**
* Get the quality parameter of this mime type i.e. the <code>q=</code>
* property. This method implements a value system similar to that used by
* the apache server i.e. if the media type is a * then it's <code>q</code>
* value is set to 0.01 and if the sub type is a * then the <code>q</code>
* value is set to 0.02 unless a specific <code>q</code> value is specified.
* If a <code>q</code> property is set it is limited to a max value of 1.0
*
* @param mimeType
* @return the quality value as a double between 0.0 and 1.0
* @throws MimeException
*/
public static double getQuality(final String mimeType) throws MimeException
{
return getMimeQuality(mimeType);
}
// Check each entry in each of the wanted lists against the entries in the
// can provide list.
// We take into consideration the QoS indicator
private static MimeType getBestMatch(final String accept, final List canProvideList)
{
if (canProvideList.size() == 1)
{
// If we only have one mime type that can be provided then thats
// what we provide even if
// the wanted list does not contain this entry or it's the worst
// QoS.
// This will cover the majority of cases
return new MimeType((String) canProvideList.get(0));
}
Map wantedMap = normaliseWantedMap(accept, canProvideList);
MimeType bestMatch = null;
double qos = 0.0;
Iterator it = wantedMap.keySet().iterator();
while (it.hasNext())
{
List wantedList = (List) wantedMap.get(it.next());
Iterator it2 = wantedList.iterator();
while (it2.hasNext())
{
String mimeType = (String) it2.next();
double q = getMimeQuality(mimeType);
String majorComponent = getMediaType(mimeType);
String minorComponent = getSubType(mimeType);
if (q > qos)
{
qos = q;
bestMatch = new MimeType(majorComponent + "/" + minorComponent);
}
}
}
// Gone through all the wanted list and found the best match possible
return bestMatch;
}
// Turn a comma separated string into a list
private static List getList(final String options)
{
List list = new ArrayList();
String[] array = options.split(",");
for (int i = 0; i < array.length; i++)
{
list.add(array[i].trim());
}
return list;
}
// Turn a comma separated string of accepted mime types into a Map
// based on the list of mime types that can be provided
private static Map normaliseWantedMap(final String accept, final List canProvide)
{
Map map = new LinkedHashMap();
String[] array = accept.split(",");
for (int i = 0; i < array.length; i++)
{
String mimeType = array[i].trim();
String major = getMediaType(mimeType);
String minor = getSubType(mimeType);
double qos = getMimeQuality(mimeType);
if (StringUtil.contains(major, "*"))
{
// All canProvide types are acceptable with the qos defined OR
// 0.01 if not defined
Iterator it = canProvide.iterator();
while (it.hasNext())
{
String mt = (String) it.next();
List list = (List) map.get(getMediaType(mt));
if (list == null)
{
list = new ArrayList();
}
list.add(mt + ";q=" + qos);
map.put(getMediaType(mt), list);
}
}
else if (StringUtil.contains(minor, "*"))
{
Iterator it = canProvide.iterator();
while (it.hasNext())
{
String mt = (String) it.next();
if (getMediaType(mt).equals(major))
{
List list = (List) map.get(major);
if (list == null)
{
list = new ArrayList();
}
list.add(major + "/" + getSubType(mt) + ";q=" + qos);
map.put(major, list);
}
}
}
else
{
if (canProvide.contains(major + "/" + minor))
{
List list = (List) map.get(major);
if (list == null)
{
list = new ArrayList();
}
list.add(major + "/" + minor + ";q=" + qos);
map.put(major, list);
}
}
}
return map;
}
/**
* Utility method to get the InputStream from a URL. Handles several
* schemes, for instance, if the URL points to a jar entry it will get a
* proper usable stream from the URL
*
* @param url
* @return
*/
public static InputStream getInputStreamForURL(URL url) throws Exception
{
try
{
return url.openStream();
} catch (ZipException e)
{
return ZipJarUtil.getInputStreamForURL(url);
}
}
}
/**
* <p>
* All methods in this class that return a Collection object actually return a
* {@link MimeTypeHashSet} that implements both the {@link Set} and
* {@link Collection} interfaces.
* </p>
*
* @author Steven McArdle
*/
class MimeDetectorRegistry
{
/**
* This property holds an instance of the TextMimeDetector. This is the only
* pre-registerd MimeDetector and cannot be de-registered or registered by
* your code
*/
private TextMimeDetector TextMimeDetector = new TextMimeDetector(1);
private Map mimeDetectors = new TreeMap();
/**
* Use the fully qualified name of a MimeDetector and try to instantiate it
* if it's not already registered. If it's already registered then log a
* warning and return the already registered MimeDetector
*
* @param mimeDetector
* @return MimeDetector registered under this name. Returns null if an
* exception occurs
*/
MimeDetector registerMimeDetector(final String mimeDetector)
{
if (mimeDetectors.containsKey(mimeDetector))
{
CapoApplication.logger.warning("MimeDetector [" + mimeDetector + "] will not be registered as a MimeDetector with this name is already registered.");
return (MimeDetector) mimeDetectors.get(mimeDetector);
}
// Create the mime detector if we can
try
{
MimeDetector md = (MimeDetector) Class.forName(mimeDetector).newInstance();
md.init();
CapoApplication.logger.finest("Registering MimeDetector with name [" + md.getName() + "] and description [" + md.getDescription() + "]");
mimeDetectors.put(mimeDetector, md);
return md;
} catch (Exception e)
{
CapoApplication.logger.log(Level.SEVERE, "Exception while registering MimeDetector [" + mimeDetector + "].", e);
}
// Failed to create an instance
return null;
}
MimeDetector getMimeDetector(final String name)
{
return (MimeDetector) mimeDetectors.get(name);
}
Collection getMimeTypes(final byte[] data) throws MimeException
{
Collection mimeTypes = new ArrayList();
try
{
if (!EncodingGuesser.getSupportedEncodings().isEmpty())
{
mimeTypes = TextMimeDetector.getMimeTypes(data);
}
} catch (UnsupportedOperationException ignore)
{
// The TextMimeDetector will throw this if it decides
// the content is not text
}
for (Iterator it = mimeDetectors.values().iterator(); it.hasNext();)
{
try
{
MimeDetector md = (MimeDetector) it.next();
mimeTypes.addAll(md.getMimeTypes(data));
} catch (UnsupportedOperationException ignore)
{
// We ignore this as it indicates that this MimeDetector does
// not support
// Getting mime types from files
} catch (Exception e)
{
CapoApplication.logger.log(Level.SEVERE, e.getLocalizedMessage(), e);
}
}
return mimeTypes;
}
Collection getMimeTypes(final String fileName) throws MimeException
{
Collection mimeTypes = new ArrayList();
try
{
if (!EncodingGuesser.getSupportedEncodings().isEmpty())
{
mimeTypes = TextMimeDetector.getMimeTypes(fileName);
}
} catch (UnsupportedOperationException ignore)
{
// The TextMimeDetector will throw this if it decides
// the content is not text
}
for (Iterator it = mimeDetectors.values().iterator(); it.hasNext();)
{
try
{
MimeDetector md = (MimeDetector) it.next();
mimeTypes.addAll(md.getMimeTypes(fileName));
} catch (UnsupportedOperationException usoe)
{
// We ignore this as it indicates that this MimeDetector does
// not support
// Getting mime types from streams
} catch (Exception e)
{
CapoApplication.logger.log(Level.SEVERE, e.getLocalizedMessage(), e);
}
}
return mimeTypes;
}
Collection getMimeTypes(final File file) throws MimeException
{
Collection mimeTypes = new ArrayList();
try
{
if (!EncodingGuesser.getSupportedEncodings().isEmpty())
{
mimeTypes = TextMimeDetector.getMimeTypes(file);
}
} catch (UnsupportedOperationException ignore)
{
// The TextMimeDetector will throw this if it decides
// the content is not text
}
for (Iterator it = mimeDetectors.values().iterator(); it.hasNext();)
{
try
{
MimeDetector md = (MimeDetector) it.next();
mimeTypes.addAll(md.getMimeTypes(file));
} catch (UnsupportedOperationException usoe)
{
// We ignore this as it indicates that this MimeDetector does
// not support
// Getting mime types from streams
} catch (Exception e)
{
CapoApplication.logger.log(Level.SEVERE, e.getLocalizedMessage(), e);
}
}
return mimeTypes;
}
Collection getMimeTypes(final InputStream in) throws MimeException
{
Collection mimeTypes = new ArrayList();
try
{
if (!EncodingGuesser.getSupportedEncodings().isEmpty())
{
mimeTypes = TextMimeDetector.getMimeTypes(in);
}
} catch (UnsupportedOperationException ignore)
{
// The TextMimeDetector will throw this if it decides
// the content is not text
}
for (Iterator it = mimeDetectors.values().iterator(); it.hasNext();)
{
try
{
MimeDetector md = (MimeDetector) it.next();
mimeTypes.addAll(md.getMimeTypes(in));
} catch (UnsupportedOperationException usoe)
{
// We ignore this as it indicates that this MimeDetector does
// not support
// Getting mime types from streams
} catch (Exception e)
{
CapoApplication.logger.log(Level.SEVERE, e.getLocalizedMessage(), e);
}
}
return mimeTypes;
}
Collection getMimeTypes(final URL url) throws MimeException
{
Collection mimeTypes = new ArrayList();
try
{
if (!EncodingGuesser.getSupportedEncodings().isEmpty())
{
mimeTypes = TextMimeDetector.getMimeTypes(url);
}
} catch (UnsupportedOperationException ignore)
{
// The TextMimeDetector will throw this if it decides
// the content is not text
}
for (Iterator it = mimeDetectors.values().iterator(); it.hasNext();)
{
try
{
MimeDetector md = (MimeDetector) it.next();
mimeTypes.addAll(md.getMimeTypes(url));
} catch (UnsupportedOperationException usoe)
{
// We ignore this as it indicates that this MimeDetector does
// not support
// Getting mime types from streams
} catch (Exception e)
{
CapoApplication.logger.log(Level.SEVERE, e.getLocalizedMessage(), e);
}
}
return mimeTypes;
}
MimeDetector unregisterMimeDetector(final String mimeDetector)
{
if (mimeDetector == null)
{
return null;
}
CapoApplication.logger.finest("Unregistering MimeDetector [" + mimeDetector + "] from registry.");
try
{
MimeDetector md = (MimeDetector) mimeDetectors.get(mimeDetector);
if (md != null)
{
md.delete();
return (MimeDetector) mimeDetectors.remove(mimeDetector);
}
} catch (Exception e)
{
CapoApplication.logger.log(Level.SEVERE, "Exception while un-registering MimeDetector [" + mimeDetector + "].", e);
}
// Shouldn't get here
return null;
}
/**
* unregister the MimeDetector from the list.
*
* @param mimeDetector
* the MimeDetector to unregister
* @return MimeDetector unregistered or null if it was not registered
*/
MimeDetector unregisterMimeDetector(final MimeDetector mimeDetector)
{
if (mimeDetector == null)
{
return null;
}
return unregisterMimeDetector(mimeDetector.getName());
}
}