package io.lumify.tikaMimeType;
import io.lumify.core.exception.LumifyException;
import io.lumify.core.util.LumifyLogger;
import io.lumify.core.util.LumifyLoggerFactory;
import org.apache.commons.io.FilenameUtils;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class LumifyMimeTypeDetector implements Detector {
private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(LumifyMimeTypeDetector.class);
public static final String EXT_TO_MIME_TYPE_MAPPING_FILE = "extToMimeTypeMapping.txt";
public static final String METADATA_FILENAME = "fileName";
private final DefaultDetector defaultDetector;
private static final Map<String, String> extToMimeTypeMapping = loadExtToMimeTypeMappingFile();
public LumifyMimeTypeDetector() {
defaultDetector = new DefaultDetector();
}
@Override
public MediaType detect(InputStream input, Metadata metadata) throws IOException {
String fileName = metadata.get(METADATA_FILENAME);
if (fileName != null) {
String mimeType = URLConnection.guessContentTypeFromName(fileName);
if (mimeType != null) {
return toMediaType(mimeType);
}
MediaType mediaType = setContentTypeUsingFileExt(FilenameUtils.getExtension(fileName).toLowerCase());
if (mediaType != null) {
return mediaType;
}
}
return defaultDetector.detect(input, metadata);
}
private static Map<String, String> loadExtToMimeTypeMappingFile() {
Map<String, String> results = new HashMap<>();
try {
InputStream in = LumifyMimeTypeDetector.class.getResourceAsStream(EXT_TO_MIME_TYPE_MAPPING_FILE);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
Pattern linePattern = Pattern.compile("(.+)\\s+(.+)");
String line;
while ((line = reader.readLine()) != null) {
Matcher m = linePattern.matcher(line);
if (!m.matches()) {
LOGGER.warn("Invalid line in mime type mapping file: %s", line);
continue;
}
String ext = m.group(1).trim().toLowerCase();
String mimeType = m.group(2).trim();
if (ext.startsWith(".")) {
ext = ext.substring(1);
}
// take the first entry because the second entry is the alternative mime type
if (!results.containsKey(ext)) {
results.put(ext, mimeType);
}
}
in.close();
} catch (IOException ex) {
throw new LumifyException("Could not load " + EXT_TO_MIME_TYPE_MAPPING_FILE);
}
return results;
}
private MediaType setContentTypeUsingFileExt(String fileExt) {
if (extToMimeTypeMapping.containsKey(fileExt)) {
return toMediaType(extToMimeTypeMapping.get(fileExt));
}
return null;
}
private MediaType toMediaType(String str) {
String[] parts = str.split("/");
return new MediaType(parts[0], parts[1]);
}
}