package edu.stanford.nlp.naturalli.demo;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.ie.util.RelationTriple;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.naturalli.NaturalLogicAnnotations;
import edu.stanford.nlp.naturalli.SentenceFragment;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import javax.servlet.*;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.*;
/**
* A simple web frontend to the Open IE System.
*
* @author Gabor Angeli
*/
public class OpenIEServlet extends HttpServlet {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(OpenIEServlet.class);
StanfordCoreNLP pipeline = null;
StanfordCoreNLP backoff = null;
/**
* Set the properties to the paths they appear at on the servlet.
* See build.xml for where these paths get copied.
* @throws ServletException Thrown by the implementation
*/
public void init() throws ServletException {
Properties commonProps = new Properties() {{
setProperty("depparse.extradependencies", "ref_only_uncollapsed");
setProperty("parse.extradependencies", "ref_only_uncollapsed");
setProperty("openie.splitter.threshold", "0.10");
setProperty("openie.optimze_for", "GENERAL");
setProperty("openie.ignoreaffinity", "false");
setProperty("openie.max_entailments_per_clause", "1000");
setProperty("openie.triple.strict", "true");
}};
try {
String dataDir = getServletContext().getRealPath("/WEB-INF/data");
System.setProperty("de.jollyday.config",
getServletContext().getRealPath("/WEB-INF/classes/holidays/jollyday.properties"));
commonProps.setProperty("pos.model", dataDir + "/english-left3words-distsim.tagger");
commonProps.setProperty("ner.model", dataDir + "/english.all.3class.distsim.crf.ser.gz," + dataDir + "/english.conll.4class.distsim.crf.ser.gz," + dataDir + "/english.muc.7class.distsim.crf.ser.gz");
commonProps.setProperty("depparse.model", dataDir + "/english_SD.gz");
commonProps.setProperty("parse.model", dataDir + "/englishPCFG.ser.gz");
commonProps.setProperty("sutime.rules", dataDir + "/defs.sutime.txt," + dataDir + "/english.sutime.txt," + dataDir + "/english.hollidays.sutime.txt");
commonProps.setProperty("openie.splitter.model", dataDir + "/clauseSplitterModel.ser.gz");
commonProps.setProperty("openie.affinity_models", dataDir);
} catch (NullPointerException e) {
log.info("Could not load servlet context. Are you on the command line?");
}
if (this.pipeline == null) {
Properties fullProps = new Properties(commonProps);
fullProps.setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse,ner,natlog,openie");
this.pipeline = new StanfordCoreNLP(fullProps);
}
if (this.backoff == null) {
Properties backoffProps = new Properties(commonProps);
backoffProps.setProperty("annotators", "parse,natlog,openie");
backoffProps.setProperty("enforceRequirements", "false");
this.backoff = new StanfordCoreNLP(backoffProps);
}
}
/**
* Annotate a document (which is usually just a sentence).
*/
public void annotate(StanfordCoreNLP pipeline, Annotation ann) {
if (ann.get(CoreAnnotations.SentencesAnnotation.class) == null) {
pipeline.annotate(ann);
} else {
if (ann.get(CoreAnnotations.SentencesAnnotation.class).size() == 1) {
CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
token.remove(NaturalLogicAnnotations.OperatorAnnotation.class);
token.remove(NaturalLogicAnnotations.PolarityAnnotation.class);
}
sentence.remove(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
sentence.remove(NaturalLogicAnnotations.EntailedSentencesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
pipeline.annotate(ann);
}
}
}
/**
* Originally extracted from Jettison; copied from http://stackoverflow.com/questions/3020094/how-should-i-escape-strings-in-json
* @param string The string to quote.
* @return A quoted version of the string, safe to send over the wire.
*/
public static String quote(String string) {
if (string == null || string.length() == 0) {
return "\"\"";
}
char c = 0;
int i;
int len = string.length();
StringBuilder sb = new StringBuilder(len + 4);
String t;
sb.append('"');
for (i = 0; i < len; i += 1) {
c = string.charAt(i);
switch (c) {
case '\\':
case '"':
sb.append('\\');
sb.append(c);
break;
case '/':
// if (b == '<') {
sb.append('\\');
// }
sb.append(c);
break;
case '\b':
sb.append("\\b");
break;
case '\t':
sb.append("\\t");
break;
case '\n':
sb.append("\\n");
break;
case '\f':
sb.append("\\f");
break;
case '\r':
sb.append("\\r");
break;
default:
if (c < ' ') {
t = "000" + Integer.toHexString(c);
sb.append("\\u" + t.substring(t.length() - 4));
} else {
sb.append(c);
}
}
}
sb.append('"');
return sb.toString();
}
private void runWithPipeline(StanfordCoreNLP pipeline, Annotation ann, Set<String> triples, Set<String> entailments) {
// Annotate
annotate(pipeline, ann);
// Extract info
for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
for (SentenceFragment fragment : sentence.get(NaturalLogicAnnotations.EntailedSentencesAnnotation.class)) {
entailments.add(quote(fragment.toString()));
}
for (RelationTriple fragment : sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class)) {
triples.add("[ " + quote(fragment.subjectGloss()) + ", " + quote(fragment.relationGloss()) + ", " + quote(fragment.objectGloss()) + " ]");
}
}
}
/**
* Actually perform the GET request, given all the relevant information (already sanity checked).
* This is the meat of the servlet code.
* @param out The writer to write the output to.
* @param q The query string.
*/
private void doGet(PrintWriter out, String q) {
// Clean the string a bit
q = q.trim();
if (q.length() == 0) {
return;
}
char lastChar = q.charAt(q.length() - 1);
if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
q = q + ".";
}
// Annotate
Annotation ann = new Annotation(q);
try {
// Collect results
Set<String> entailments = new HashSet<>();
Set<String> triples = new LinkedHashSet<>();
runWithPipeline(pipeline, ann, triples, entailments); // pipeline must come before backoff
if (triples.size() == 0) {
runWithPipeline(backoff, ann, triples, entailments); // backoff must come after pipeline
}
// Write results
out.println("{ " +
"\"ok\":true, " +
"\"entailments\": [" + StringUtils.join(entailments, ",") + "], " +
"\"triples\": [" + StringUtils.join(triples, ",") + "], " +
"\"msg\": \"\"" +
" }");
} catch (Throwable t) {
out.println("{ok:false, entailments:[], triples:[], msg:" + quote(t.getMessage()) + "}");
}
}
/**
* {@inheritDoc}
*/
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
if (request.getCharacterEncoding() == null) {
request.setCharacterEncoding("utf-8");
}
response.setContentType("text/json; charset=UTF-8");
PrintWriter out = response.getWriter();
String raw = request.getParameter("q");
if (raw == null || "".equals(raw)) {
out.println("{ok:false, entailments:[], triples=[], msg=\"\"}");
} else {
doGet(out, raw);
}
out.close();
}
/**
* {@inheritDoc}
*/
public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
doGet(request, response);
}
/**
* A helper so that we can see how the servlet sees the world, modulo model paths, at least.
*/
public static void main(String[] args) throws ServletException, IOException {
OpenIEServlet servlet = new OpenIEServlet();
servlet.init();
IOUtils.console(line -> {
StringWriter str = new StringWriter();
PrintWriter out = new PrintWriter(str);
servlet.doGet(new PrintWriter(out), line);
out.close();
System.out.println(str.toString());
});
}
}