package org.opensextant.xtext.converters;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.opensextant.data.LatLon;
import org.opensextant.util.GeodeticUtility;
import org.opensextant.xtext.ConvertedDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Parse mainly JPEG images or any others that have significant metadata headers
* headers are tabulated and put into doc conversion as the text buffer (possibly not desirable).
* And of course if there are loc/time info in the image, such things are pulled out.
*
* @author ubaldino
*
*/
public class ImageMetadataConverter extends ConverterAdapter {
private final Detector detector = new DefaultDetector();
private final Parser parser = new AutoDetectParser(detector);
private final ParseContext ctx = new ParseContext();
private final Logger logger = LoggerFactory.getLogger(getClass());
private boolean emitMinimalText = true;
public final static String[] usefulFields = { "geo", "gps", "creation", "date", "model" };
private final static Set<String> usefulFieldsSet = new HashSet<String>();
static {
usefulFieldsSet.addAll(Arrays.asList(usefulFields));
}
public ImageMetadataConverter() {
ctx.set(Parser.class, parser);
}
/**
* This form generates a TEXT version of the JPEG that has the minimal amount of text - GPS*, geo*, model, and creation (date).
* @param mimimalText true if you wish to save minimal text with conversions; Otherwise default is to format all EXIF or other metadata properties as text
*/
public ImageMetadataConverter(boolean mimimalText) {
this();
emitMinimalText = mimimalText;
}
/**
* filter out irrelevant metadata for text.
* @param metakey property name
* @return if property is useful by our standards; see usefulfields
*/
private static boolean isUseful(String metakey) {
if (metakey == null) {
return false;
}
String testKey = metakey.toLowerCase();
for (String key : usefulFields) {
if (key.contains(testKey)) {
return true;
}
}
return false;
}
/**
* Could pull in geodesy to do an Angle(lat,lon).toString() ...
* @param yx LatLon object
* @return formatted string of LL
*/
private String formatCoord(LatLon yx) {
if (GeodeticUtility.validateCoordinate(yx.getLatitude(), yx.getLongitude())) {
String latHemi = "N";
String lonHemi = "E";
if (yx.getLatitude() < 0) {
latHemi = "S";
}
if (yx.getLongitude() < 0) {
lonHemi = "W";
}
return String.format("%2.6f%s %2.6f%s", Math.abs(yx.getLatitude()), latHemi,
Math.abs(yx.getLongitude()), lonHemi);
} else {
return String.format("invalid Lat %d x Lon %d", yx.getLatitude(), yx.getLongitude());
}
}
@Override
protected ConvertedDocument conversionImplementation(InputStream in, File doc)
throws IOException {
ConvertedDocument imgDoc = new ConvertedDocument(doc);
imgDoc.setEncoding(ConvertedDocument.OUTPUT_ENCODING);
imgDoc.is_plaintext = false;
Metadata metadata = new Metadata();
StringBuilder buf = new StringBuilder();
BodyContentHandler handler = new BodyContentHandler();
String type = "Image";
String objName = null;
if (doc != null) {
objName = doc.getName();
String ext = FilenameUtils.getExtension(doc.getName().toLowerCase());
if ("jpg".equals(ext) || "jpeg".equals(ext)) {
type = "Photo";
}
}
try {
parser.parse(in, handler, metadata, ctx);
if (objName == null) {
objName = metadata.get(Metadata.RESOURCE_NAME_KEY);
}
// What is the signal to generate any text buffer at all?
// Is it worth puttting out a full EXIF dump for a JPEG?
//
int mdCount = metadata.names().length;
if (mdCount == 0) {
// No meaningful text or other metadata.
return null;
}
buf.append("Image Specifications\n===================\n");
List<String> metaKeys = Arrays.asList(metadata.names());
Collections.sort(metaKeys);
for (String key : metaKeys) {
if (this.emitMinimalText && !isUseful(key)) {
continue;
}
String val = metadata.get(key);
if (StringUtils.isBlank(val)) {
val = "(N/A)";
}
buf.append(String.format("%s:\t%s\n", key, val));
}
// Title
imgDoc.addTitle(String.format("%s: %s", type, objName));
// Author
imgDoc.addAuthor(metadata.get(TikaCoreProperties.CREATOR));
// Date
imgDoc.addCreateDate(metadata.getDate(TikaCoreProperties.CREATED));
// Geographic
String lat = metadata.get(TikaCoreProperties.LATITUDE);
String lon = metadata.get(TikaCoreProperties.LONGITUDE);
// Location if available.
if (lat != null && lon != null) {
logger.info("Found a location LAT={} LON={}", lat, lon);
// imgDoc.addProperty("location", String.format("%2.8f,%3.8f", ));
imgDoc.addUserProperty("location", String.format("%s, %s", lat, lon));
try {
LatLon yx = GeodeticUtility.parseLatLon(lat, lon);
buf.append("Location:\t" + formatCoord(yx) + "\n");
} catch (ParseException parseErr) {
//
}
}
// EXIF and other text content
imgDoc.setText(buf.toString());
return imgDoc;
} catch (Exception xerr) {
throw new IOException("Unable to parse content", xerr);
} finally {
in.close();
}
}
}