package org.opensextant.service;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLStreamHandler;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.ArrayList;
import java.util.concurrent.ConcurrentMap;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileUploadException;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.geojson.Feature;
import org.geojson.FeatureCollection;
import org.geojson.Point;
import org.opensextant.placedata.Geocoord;
import org.opensextant.placedata.Place;
import org.opensextant.tagger.Document;
import org.opensextant.tagger.Match;
import org.opensextant.service.processing.DocumentProcessorPool;
import org.restlet.Request;
import org.restlet.data.MediaType;
import org.restlet.ext.fileupload.RestletFileUpload;
import org.restlet.ext.jackson.JacksonRepresentation;
import org.restlet.representation.Representation;
import org.restlet.representation.StringRepresentation;
import org.restlet.resource.Get;
import org.restlet.resource.Post;
import org.restlet.resource.Put;
import org.restlet.resource.ServerResource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class OpenSextantExtractorResource extends ServerResource {
/** Log object. */
private static final Logger LOGGER = LoggerFactory.getLogger(OpenSextantApplication.class);
/** The name of the field in the form which holds the uploaded file. */
static String formFileName = "infile";
/** The result formats supported. */
static Set<String> formats = new HashSet<String>();
static {
formats.add("json");
formats.add("extjson");
formats.add("geojson");
formats.add("xml");
formats.add("csv");
}
/** The pool from which the document processor is pulled. */
DocumentProcessorPool dpPool;
@Override
protected void doInit() {
super.doInit();
// get a reference to the pool in the Application
dpPool = ((OpenSextantApplication) getApplication()).getPool();
}
@Get
public Representation doGet() {
return new StringRepresentation("GET is not supported, use POST or PUT");
}
@Post
@Put
public Representation doPost(Representation entity) throws IOException {
// get the request
Request req = getRequest();
// get the submitted attributes
ConcurrentMap<String, Object> attrs = req.getAttributes();
String type = (String) attrs.get("extracttype");
String format = (String) attrs.get("resultformat");
String sourceURLString = (String) attrs.get("url");
if (sourceURLString != null) {
String sourceURL = java.net.URLDecoder.decode(sourceURLString, "UTF-8");
URL url = new URL(sourceURL);
return extract(type, format, url);
}
// return list of extraction types
if (type == null) {
Set<String> ret = this.dpPool.getProcessNames();
return new JacksonRepresentation<Set<String>>(ret);
}
// return list of result types
if (format == null) {
return new JacksonRepresentation<Set<String>>(formats);
}
if (entity != null) {
MediaType media = entity.getMediaType();
if (media == null) {// bare stream?
InputStream ios = entity.getStream();
URL url = streamUrl("http://infile", ios, null);
return extract(type, format, url);
}
// if its a form
if (MediaType.MULTIPART_FORM_DATA.equals(media, true)) {
URL u = handleForm(entity, formFileName);
if (u != null) {
return extract(type, format, u);
}
return new StringRepresentation("Form with no field named \"" + formFileName + "\"");
}
// if it is text of some kind
if (MediaType.TEXT_PLAIN.equals(media, true) || MediaType.TEXT_XML.equals(media, true)
|| MediaType.TEXT_HTML.equals(media, true) || MediaType.APPLICATION_XML.equals(media, true)) {
return extract(type, format, entity.getText());
}
// if it WWW form
if (MediaType.APPLICATION_WWW_FORM.equals(media, true)) {
return extract(type, format, entity.getText());
}
return new StringRepresentation("POST or PUT requested but can't handle " + media.getName() + " type body");
} else {
return new StringRepresentation("POST or PUT requested but no body provided");
}
}
/** Extract the content from the submitted form as a URL. */
private URL handleForm(Representation entity, String filename) {
RestletFileUpload fileupload = new RestletFileUpload(new DiskFileItemFactory());
List<FileItem> fileItems = null;
try {
fileItems = fileupload.parseRepresentation(entity);
} catch (FileUploadException e) {
LOGGER.error("Couldnt handle provided form", e);
}
// look for the field containing the file
for (FileItem fileItem : fileItems) {
if (fileItem.getFieldName().equalsIgnoreCase(filename)) {
String fn = fileItem.getName();
try {
File tmpFile = File.createTempFile("ossvr", fn);
tmpFile.deleteOnExit();
fileItem.write(tmpFile);
return tmpFile.toURI().toURL();
} catch (MalformedURLException e) {
LOGGER.error("Couldnt handle provided form", e);
return null;
} catch (IOException e) {
LOGGER.error("Couldnt handle provided form", e);
return null;
} catch (Exception e) {
LOGGER.error("Couldnt handle provided form", e);
return null;
}
}
} // end fileitems loop
// didnt find a field of the correct name
return null;
}
private Representation extract(String extractType, String resultFormat, String content) {
if (dpPool.getProcessNames().contains(extractType)) {
Document result = dpPool.process(extractType, content);
return convertResult(result, resultFormat);
} else {
return new StringRepresentation("Unknown extraction type:" + extractType);
}
}
private Representation extract(String extractType, String resultFormat, URL content) {
if (dpPool.getProcessNames().contains(extractType)) {
Document doc = dpPool.process(extractType, content);
// clean up temp file if used
if ("file".equalsIgnoreCase(content.getProtocol())) {
String tempFilePath = content.getPath();
File tmpFile = new File(tempFilePath);
if (!tmpFile.delete()) {
LOGGER.error("Unable to delete temp file" + tmpFile.getPath());
}
}
if (doc != null) {
return convertResult(doc, resultFormat);
} else {
return new StringRepresentation("Couldnt extract content from:" + content.toExternalForm());
}
} else {
return new StringRepresentation("Unknown extraction type:" + extractType);
}
}
private Representation convertResult(Document db, String resultFormat) {
if ("extjson".equalsIgnoreCase(resultFormat)) {
return new JacksonRepresentation<Document>(db);
}
// For the non-extended result formats we remove excess
// information, such as the alternative annotation
// "candidates", from each Match.
// This is done to keep the result succinct.
List<Match> featureList = new ArrayList<Match>();
for (Match a : db.getAnnoList()) {
Match m = new Match();
m.setStart(a.getStart());
m.setEnd(a.getEnd());
m.setType(a.getType());
m.setMatchText(a.getMatchText());
Map<String, Object> feats = a.getFeatures();
feats.remove("candidates");
m.setFeatures(feats);
featureList.add(m);
}
db.setAnnoList(featureList);
if ("json".equalsIgnoreCase(resultFormat)) {
return new JacksonRepresentation<Document>(db);
}
if ("geojson".equalsIgnoreCase(resultFormat)) {
FeatureCollection coll = new FeatureCollection();
for (Match a : db.getAnnoList()) {
Feature ft = new Feature();
String t = a.getType();
Map<String, Object> fm = a.getFeatures();
Object h = fm.get("hierarchy");
ft.setProperty("matchtext", a.getMatchText());
ft.setProperty("entitytype", t);
ft.setProperty("hierarchy", h);
ft.setProperty("start", a.getStart());
ft.setProperty("end", a.getEnd());
ft.setProperty("snippet", db.getSnippet(a, 25));
ft.setGeometry(null);
if ("Date".equalsIgnoreCase(t)) {
Date dt = (Date) fm.get("date");
ft.setProperty("date", dt.toString());
}
if ("PLACE".equalsIgnoreCase(t)) {
Place pl = (Place) fm.get("place");
ft.setProperty("placeName", pl.getPlaceName());
ft.setProperty("countrycode", pl.getCountryCode());
ft.setProperty("featureclass", pl.getFeatureClass());
ft.setProperty("featurecode", pl.getFeatureCode());
Point pt = new Point(pl.getLongitude(), pl.getLatitude());
ft.setGeometry(pt);
}
if ("GEOCOORD".equalsIgnoreCase(t)) {
Geocoord geo = (Geocoord) fm.get("geo");
Point pt = new Point(geo.getLongitude(), geo.getLatitude());
ft.setGeometry(pt);
}
coll.add(ft);
}
return new JacksonRepresentation<FeatureCollection>(coll);
}
if ("xml".equalsIgnoreCase(resultFormat)) {
return new JacksonRepresentation<Document>(MediaType.TEXT_XML, db);
}
if ("csv".equalsIgnoreCase(resultFormat)) {
StringBuilder buff = new StringBuilder();
buff.append(
"MatchText\tType\tHierarchy\tStart\tEnd\tSnippet\tDate\tPlaceName\tCountryCode\tFeatureClass\tFeatureCode\tLatitude\tLongitude\n");
for (Match a : db.getAnnoList()) {
String t = a.getType();
Map<String, Object> fm = a.getFeatures();
Object h = fm.get("hierarchy");
buff.append(a.getMatchText()).append("\t").append(t).append("\t").append(h).append("\t")
.append(a.getStart()).append("\t").append(a.getEnd()).append("\t");
buff.append(db.getSnippet(a, 25));
if ("Date".equalsIgnoreCase(t)) {
Date dt = (Date) fm.get("date");
buff.append("\t");
buff.append(dt).append("\t");
} else {
buff.append("\t");
}
if ("PLACE".equalsIgnoreCase(t)) {
Place pl = (Place) fm.get("place");
buff.append("\t");
buff.append(pl.getPlaceName()).append("\t");
buff.append(pl.getCountryCode()).append("\t");
buff.append(pl.getFeatureClass()).append("\t");
buff.append(pl.getFeatureCode()).append("\t");
buff.append(pl.getLatitude()).append("\t");
buff.append(pl.getLongitude()).append("\t");
}
if ("GEOCOORD".equalsIgnoreCase(t)) {
Geocoord geo = (Geocoord) fm.get("geo");
buff.append("\t");
buff.append("\t");
buff.append("\t");
buff.append("\t");
buff.append("\t");
buff.append(geo.getLatitude()).append("\t");
buff.append(geo.getLongitude()).append("\t");
}
buff.append("\n");
}
return new StringRepresentation(buff.toString());
}
return new JacksonRepresentation<Document>(db);
}
/** Create a URL based on an InputStream. */
private URL streamUrl(String urlString, final InputStream is, final String contType) throws MalformedURLException {
return new URL(null, urlString, new URLStreamHandler() {
@Override
public URLConnection openConnection(URL u) {
return new URLConnection(u) {
@Override
public String getContentType() {
return contType;
}
@Override
public void connect() {
// do nothing
}
@Override
public InputStream getInputStream() {
return is;
}
};
}
});
}
public static Set<String> getFormats() {
return formats;
}
}