/** * Copyright (c) Codice Foundation * <p/> * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser * General Public License as published by the Free Software Foundation, either version 3 of the * License, or any later version. * <p/> * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. A copy of the GNU Lesser General Public License * is distributed along with this program and can be found at * <http://www.gnu.org/licenses/lgpl.html>. */ package ddf.mime.mapper; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.xml.sax.SAXException; import ddf.mime.MimeTypeMapper; import ddf.mime.MimeTypeResolutionException; import ddf.mime.MimeTypeResolver; /** * Implementation of the {@link MimeTypeMapper} interface that searches through all of the * registered {@link MimeTypeResolver}s to retieve file extension for a given mime type, and vice * versa. Once a file extension (or mime type) is resolved, this mapper stops searching through any * remaining {@link MimeTypeResolver}s and returns. * * @since 2.1.0 * */ public class MimeTypeMapperImpl implements MimeTypeMapper { private static final Logger LOGGER = LoggerFactory.getLogger(MimeTypeMapperImpl.class); private static final String XML_FILE_EXTENSION = "xml"; private static Comparator<MimeTypeResolver> comparator = new Comparator<MimeTypeResolver>() { public int compare(MimeTypeResolver o1, MimeTypeResolver o2) { return o1.getPriority() - o2.getPriority(); } }; /** * The {@link List} of {@link MimeTypeResolver}s configured for this mapper and will be searched * on mime type/file extension mapping requests. */ protected List<MimeTypeResolver> mimeTypeResolvers; protected MimeTypeResolver mimeTypeResolver; /** * Constructs the MimeTypeMapper with a list of {@link MimeTypeResolver}s. * * @param mimeTypeResolvers * the {@link List} of {@link MimeTypeResolver}s */ public MimeTypeMapperImpl(List<MimeTypeResolver> mimeTypeResolvers) { LOGGER.debug("INSIDE: MimeTypeMapperImpl constructor"); this.mimeTypeResolvers = mimeTypeResolvers; } @Override public String getFileExtensionForMimeType(String mimeType) throws MimeTypeResolutionException { LOGGER.trace("ENTERING: getFileExtensionForMimeType()"); String extension = null; LOGGER.debug("Looping through {} MimeTypeResolvers", mimeTypeResolvers.size()); // Sort the mime type resolvers in descending order of priority. This should // insure custom mime type resolvers are called before the (default) Apache Tika // mime type resolver. List<MimeTypeResolver> sortedResolvers = sortResolvers(mimeTypeResolvers); // Loop through all of the configured MimeTypeResolvers. The order of their // invocation is determined by their OSGi service ranking. The default // TikaMimeTypeResolver should be called last, allowing any configured custom // mime type resolvers to be invoked first - this allows custom mime type // resolvers that may override mime types supported by Tika to be invoked first. // Once a file extension is find for the given mime type, exit the loop. for (MimeTypeResolver resolver : sortedResolvers) { LOGGER.debug("Calling MimeTypeResolver {}", resolver.getName()); try { extension = resolver.getFileExtensionForMimeType(mimeType); } catch (Exception e) { LOGGER.warn("Error resolving file extension for mime type: {}", mimeType); throw new MimeTypeResolutionException(e); } if (StringUtils.isNotEmpty(extension)) { LOGGER.debug("extension [{}] retrieved from MimeTypeResolver: {}", extension, resolver.getName()); break; } } LOGGER.debug("mimeType = {}, file extension = [{}]", mimeType, extension); LOGGER.trace("EXITING: getFileExtensionForMimeType()"); return extension; } @Override public String getMimeTypeForFileExtension(String fileExtension) throws MimeTypeResolutionException { LOGGER.trace("ENTERING: getMimeTypeForFileExtension()"); String mimeType = null; LOGGER.debug("Looping through {} MimeTypeResolvers", mimeTypeResolvers.size()); // TODO: This is to force the TikaMimeTypeResolver to be called // after the CustomMimeTypeResolvers to prevent Tika default mapping // from being used when a CustomMimeTypeResolver may be more appropriate. List<MimeTypeResolver> sortedResolvers = sortResolvers(mimeTypeResolvers); // Loop through all of the configured MimeTypeResolvers. The order of their // invocation is determined by their OSGi service ranking. The default // TikaMimeTypeResolver should be called last, allowing any configured custom // mime type resolvers to be invoked first - this allows custom mime type // resolvers that may override mime types supported by Tika to be invoked first. // Once a file extension is find for the given mime type, exit the loop. for (MimeTypeResolver resolver : sortedResolvers) { LOGGER.debug("Calling MimeTypeResolver {}", resolver.getName()); try { mimeType = resolver.getMimeTypeForFileExtension(fileExtension); } catch (Exception e) { LOGGER.warn("Error resolving mime type for file extension: " + fileExtension); throw new MimeTypeResolutionException(e); } if (StringUtils.isNotEmpty(mimeType)) { LOGGER.debug("mimeType [{}] retrieved from MimeTypeResolver: ", mimeType, resolver.getName()); break; } } LOGGER.debug("mimeType = {}, file extension = [{}]", mimeType, fileExtension); LOGGER.trace("EXITING: getMimeTypeForFileExtension()"); return mimeType; } @Override public String guessMimeType(InputStream is, String fileExtension) throws MimeTypeResolutionException { LOGGER.trace("ENTERING: guessMimeType()"); String mimeType = null; LOGGER.debug("Looping through{} MimeTypeResolvers", mimeTypeResolvers.size()); // This is to force the TikaMimeTypeResolver to be called // after the CustomMimeTypeResolvers to prevent Tika default mapping // from being used when a CustomMimeTypeResolver may be more appropriate. List<MimeTypeResolver> sortedResolvers = sortResolvers(mimeTypeResolvers); // If file has XML extension, then read root element namespace once so // each MimeTypeResolver does not have to open the stream and read the namespace String namespace = null; if (fileExtension.equals(XML_FILE_EXTENSION)) { namespace = getRootElementNamespace(is); LOGGER.debug("namespace = {}", namespace); } // Loop through all of the configured MimeTypeResolvers. The order of their // invocation is determined by their OSGi service ranking. The default // TikaMimeTypeResolver should be called last, allowing any configured custom // mime type resolvers to be invoked first - this allows custom mime type // resolvers that may override mime types supported by Tika to be invoked first. // Once a file extension is find for the given mime type, exit the loop. for (MimeTypeResolver resolver : sortedResolvers) { LOGGER.debug("Calling MimeTypeResolver {}", resolver.getName()); try { // If processing an XML file, then match the namespace extracted from the // XML file to the MimeTypeResolver that supports that schema (namespace). // If no MimeTypeResolvers support the namespace, then mime type will be null. // Even if a MimeTypeResolver, such as the TikaMimeTypeResolver, were to handle // XML files that have no "known" schema it is highly unlikely there would be // an InputTransformer to create a metacard for that "generic" XML file. if (fileExtension.equals(XML_FILE_EXTENSION)) { if (namespace != null && resolver.hasSchema()) { if (namespace.equals(resolver.getSchema())) { mimeType = resolver.getMimeTypeForFileExtension(fileExtension); } } } else { mimeType = resolver.getMimeTypeForFileExtension(fileExtension); } } catch (Exception e) { LOGGER.warn("Error resolving mime type for file extension: " + fileExtension); throw new MimeTypeResolutionException(e); } if (StringUtils.isNotEmpty(mimeType)) { LOGGER.debug("mimeType [{}] retrieved from MimeTypeResolver: ", mimeType, resolver.getName()); break; } } LOGGER.debug("mimeType = {}, file extension = [{}]", mimeType, fileExtension); LOGGER.trace("EXITING: guessMimeType()"); return mimeType; } private String getRootElementNamespace(InputStream is) { LOGGER.trace("ENTERING: getRootElementNamespace()"); if (is == null) { return null; } String namespace = null; DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); try { DocumentBuilder db = dbf.newDocumentBuilder(); Document document = db.parse(is); Node node = document.getDocumentElement(); namespace = node.getNamespaceURI(); } catch (ParserConfigurationException | SAXException | IOException e) { LOGGER.debug("Unable to get root element namespace"); } LOGGER.trace("ENXITING: getRootElementNamespace() - namespace = {}", namespace); return namespace; } /** * Sort the list of {@link MimeTypeResolver}s by their descending priority, i.e., the lower the * priority the later the {@link MimeTypeResolver} is invoked. * * @param resolvers * the {@link List} of {@link MimeTypeResolver}s * @return the sorted list of {@link MimeTypeResolver}s by descending priority */ private List<MimeTypeResolver> sortResolvers(List<MimeTypeResolver> resolvers) { LOGGER.debug("ENTERING: sortResolvers()"); List<MimeTypeResolver> sortedResolvers = null; if (resolvers != null) { // Log sorted list of PreIngestServices for debugging if (LOGGER.isDebugEnabled()) { LOGGER.debug("Unsorted services"); LOGGER.debug("------------------"); for (MimeTypeResolver resolver : resolvers) { LOGGER.debug("{} (priority: {})", resolver.getName(), resolver.getPriority()); } } // Make copy of input services list because OSGi/Blueprint marks this input list as // read-only sortedResolvers = new ArrayList<MimeTypeResolver>(resolvers); // Inner class Comparator for comparing/sorting Comparator<MimeTypeResolver> comparator = new Comparator<MimeTypeResolver>() { @Override public int compare(MimeTypeResolver arg0, MimeTypeResolver arg1) { LOGGER.debug("INSIDE: Comparator"); return (arg0.getPriority() - arg1.getPriority()); } }; if (sortedResolvers.size() > 1) { LOGGER.debug("Sorting resolvers"); Collections.sort(sortedResolvers, comparator); Collections.reverse(sortedResolvers); } // Log sorted list of PreIngestServices for debugging if (LOGGER.isDebugEnabled()) { LOGGER.debug("Sorted/prioritized services"); LOGGER.debug("---------------------------"); for (MimeTypeResolver resolver : sortedResolvers) { LOGGER.debug("{} (priority: {})", resolver.getName(), resolver.getPriority()); } } } LOGGER.debug("EXITING: sortResolvers()"); return sortedResolvers; } }