/** * Copyright (c) 2000-present Liferay, Inc. All rights reserved. * * This library is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. */ package com.liferay.portal.util; import com.liferay.portal.kernel.exception.SystemException; import com.liferay.portal.kernel.log.Log; import com.liferay.portal.kernel.log.LogFactoryUtil; import com.liferay.portal.kernel.util.ContentTypes; import com.liferay.portal.kernel.util.GetterUtil; import com.liferay.portal.kernel.util.MimeTypes; import com.liferay.portal.kernel.util.SetUtil; import com.liferay.portal.kernel.util.StreamUtil; import com.liferay.portal.kernel.util.Validator; import java.io.File; import java.io.FileNotFoundException; import java.io.InputStream; import java.net.URL; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.tika.detect.DefaultDetector; import org.apache.tika.detect.Detector; import org.apache.tika.io.CloseShieldInputStream; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MimeTypesReaderMetKeys; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; /** * @author Jorge Ferrer * @author Brian Wing Shun Chan * @author Alexander Chow */ public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys { public MimeTypesImpl() { _detector = new DefaultDetector( org.apache.tika.mime.MimeTypes.getDefaultMimeTypes()); _webImageMimeTypes = SetUtil.fromArray( PropsValues.MIME_TYPES_WEB_IMAGES); } public void afterPropertiesSet() { URL url = org.apache.tika.mime.MimeTypes.class.getResource( "tika-mimetypes.xml"); try { read(url.openStream()); } catch (Exception e) { _log.error("Unable to populate extensions map", e); } } @Override public String getContentType(File file) { return getContentType(file, file.getName()); } @Override public String getContentType(File file, String fileName) { if ((file == null) || !file.exists()) { return getContentType(fileName); } InputStream is = null; try { is = TikaInputStream.get(file); return getContentType(is, fileName); } catch (FileNotFoundException fnfe) { return getContentType(fileName); } finally { StreamUtil.cleanUp(is); } } @Override public String getContentType(InputStream inputStream, String fileName) { if (inputStream == null) { return getContentType(fileName); } String contentType = null; TikaInputStream tikaInputStream = null; try { tikaInputStream = TikaInputStream.get( new CloseShieldInputStream(inputStream)); Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, fileName); MediaType mediaType = _detector.detect(tikaInputStream, metadata); contentType = mediaType.toString(); if (contentType.contains("tika")) { if (_log.isDebugEnabled()) { _log.debug("Retrieved invalid content type " + contentType); } contentType = getContentType(fileName); } if (contentType.contains("tika")) { if (_log.isDebugEnabled()) { _log.debug("Retrieved invalid content type " + contentType); } contentType = ContentTypes.APPLICATION_OCTET_STREAM; } } catch (Exception e) { _log.error(e, e); contentType = ContentTypes.APPLICATION_OCTET_STREAM; } finally { StreamUtil.cleanUp(tikaInputStream); } return contentType; } @Override public String getContentType(String fileName) { if (Validator.isNull(fileName)) { return ContentTypes.APPLICATION_OCTET_STREAM; } try { Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, fileName); MediaType mediaType = _detector.detect(null, metadata); String contentType = mediaType.toString(); if (!contentType.contains("tika")) { return contentType; } else if (_log.isDebugEnabled()) { _log.debug("Retrieved invalid content type " + contentType); } } catch (Exception e) { _log.error(e, e); } return ContentTypes.APPLICATION_OCTET_STREAM; } @Override public String getExtensionContentType(String extension) { if (Validator.isNull(extension)) { return ContentTypes.APPLICATION_OCTET_STREAM; } return getContentType("A.".concat(extension)); } @Override public Set<String> getExtensions(String contentType) { Set<String> extensions = _extensionsMap.get(contentType); if (extensions == null) { extensions = Collections.emptySet(); } return extensions; } @Override public boolean isWebImage(String mimeType) { return _webImageMimeTypes.contains(mimeType); } protected void read(InputStream stream) throws Exception { DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); Document document = documentBuilder.parse(new InputSource(stream)); Element element = document.getDocumentElement(); if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) { throw new SystemException("Invalid configuration file"); } NodeList nodeList = element.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { Node node = nodeList.item(i); if (node.getNodeType() != Node.ELEMENT_NODE) { continue; } Element childElement = (Element)node; if (MIME_TYPE_TAG.equals(childElement.getTagName())) { readMimeType(childElement); } } } protected void readMimeType(Element element) { Set<String> mimeTypes = new HashSet<>(); Set<String> extensions = new HashSet<>(); String name = element.getAttribute(MIME_TYPE_TYPE_ATTR); mimeTypes.add(name); NodeList nodeList = element.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { Node node = nodeList.item(i); if (node.getNodeType() != Node.ELEMENT_NODE) { continue; } Element childElement = (Element)node; if (ALIAS_TAG.equals(childElement.getTagName())) { String alias = childElement.getAttribute(ALIAS_TYPE_ATTR); mimeTypes.add(alias); } else if (GLOB_TAG.equals(childElement.getTagName())) { boolean regex = GetterUtil.getBoolean( childElement.getAttribute(ISREGEX_ATTR)); if (regex) { continue; } String pattern = childElement.getAttribute(PATTERN_ATTR); if (!pattern.startsWith("*")) { continue; } String extension = pattern.substring(1); if (!extension.contains("*") && !extension.contains("?") && !extension.contains("[")) { extensions.add(extension); } } } for (String mimeType : mimeTypes) { _extensionsMap.put(mimeType, extensions); } } private static final Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class); private final Detector _detector; private final Map<String, Set<String>> _extensionsMap = new HashMap<>(); private final Set<String> _webImageMimeTypes; }