package edu.stanford.nlp.pipeline.webapp; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.LinkedHashMap; import java.util.Map; import java.util.function.Consumer; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import edu.stanford.nlp.io.RuntimeIOException; import edu.stanford.nlp.pipeline.AnnotationOutputter; import nu.xom.Builder; import nu.xom.Document; import nu.xom.Nodes; import nu.xom.xslt.XSLTransform; import org.apache.commons.lang3.StringEscapeUtils; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.pipeline.XMLOutputter; public class CoreNLPServlet extends HttpServlet { private static final long serialVersionUID = 1L; private StanfordCoreNLP pipeline; private XSLTransform corenlpTransformer; private String defaultFormat = "pretty"; private static final int MAXIMUM_QUERY_LENGTH = 4096; public void init() throws ServletException { pipeline = new StanfordCoreNLP(); String xslPath = getServletContext(). getRealPath("/WEB-INF/data/CoreNLP-to-HTML.xsl"); try { Builder builder = new Builder(); Document stylesheet = builder.build(new File(xslPath)); corenlpTransformer = new XSLTransform(stylesheet); } catch (Exception e) { throw new ServletException(e); } } public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { if (request.getCharacterEncoding() == null) { request.setCharacterEncoding("utf-8"); } response.setContentType("text/html; charset=UTF-8"); this.getServletContext().getRequestDispatcher("/header.jsp"). include(request, response); addResults(request, response); this.getServletContext().getRequestDispatcher("/footer.jsp"). include(request, response); } public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request, response); } public void addResults(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String input = request.getParameter("input"); if (input == null) { return; } input = input.trim(); if (input.equals("")) { return; } PrintWriter out = response.getWriter(); if (input.length() > MAXIMUM_QUERY_LENGTH) { out.print("<div>This query is too long. If you want to run very long queries, please download and use our <a href=\"http://nlp.stanford.edu/software/corenlp.shtml\">publicly released distribution</a>.</div>"); return; } Annotation annotation = new Annotation(input); pipeline.annotate(annotation); String outputFormat = request.getParameter("outputFormat"); if (outputFormat == null || outputFormat.trim().equals("")) { outputFormat = this.defaultFormat; } switch (outputFormat) { case "xml": outputXml(out, annotation); break; case "json": outputJson(out, annotation); break; case "conll": outputCoNLL(out, annotation); break; case "pretty": outputPretty(out, annotation); break; default: outputVisualise(out, annotation); break; } } public void outputVisualise(PrintWriter out, Annotation annotation) throws ServletException, IOException { // Note: A lot of the HTML generation in this method could/should be // done at a templating level, but as-of-yet I am not entirely sure how // this should be done in jsp. Also, a lot of the HTML is unnecessary // for the other outputs such as pretty print and XML. // Div for potential error messages when fetching the configuration. out.println("<div id=\"config_error\">"); out.println("</div>"); // Insert divs that will be used for each visualisation type. final int visualiserDivPxWidth = 700; Map<String, String> nameByAbbrv = new LinkedHashMap<>(); nameByAbbrv.put("pos", "Part-of-Speech"); nameByAbbrv.put("ner", "Named Entity Recognition"); nameByAbbrv.put("coref", "Coreference"); nameByAbbrv.put("basic_dep", "Basic Dependencies"); //nameByAbbrv.put("collapsed_dep", "Collapsed dependencies"); nameByAbbrv.put("collapsed_ccproc_dep", "Enhanced Dependencies"); for (Map.Entry<String, String> entry : nameByAbbrv.entrySet()) { out.println("<h2>" + entry.getValue() + ":</h2>"); out.println("<div id=\"" + entry.getKey() + "\" style=\"width:" + visualiserDivPxWidth + "px\">"); out.println(" <div id=\"" + entry.getKey() + "_loading\">"); out.println(" <p>Loading...</p>"); out.println(" </div>"); out.println("</div>"); out.println(""); } // Time to get the XML data into HTML. StringWriter xmlOutput = new StringWriter(); pipeline.xmlPrint(annotation, xmlOutput); xmlOutput.flush(); // Escape the XML to be embeddable into a Javascript string. String escapedXml = xmlOutput.toString().replaceAll("\\r\\n|\\r|\\n", "" ).replace("\"", "\\\""); // Inject the XML results into the HTML to be retrieved by the Javascript. out.println("<script type=\"text/javascript\">"); out.println("// <![CDATA["); out.println(" stanfordXML = \"" + escapedXml + "\";"); out.println("// ]]>"); out.println("</script>"); // Relative brat installation location to CoreNLP. final String bratLocation = "../brat"; // Inject the location variable, we need it in Javascript mode. out.println("<script type=\"text/javascript\">"); out.println("// <![CDATA["); out.println(" bratLocation = \"" + bratLocation + "\";"); out.println(" webFontURLs = [\n" + " '"+ bratLocation + "/static/fonts/Astloch-Bold.ttf',\n" + " '"+ bratLocation + "/static/fonts/PT_Sans-Caption-Web-Regular.ttf',\n" + " '"+ bratLocation + "/static/fonts/Liberation_Sans-Regular.ttf'];"); out.println("// ]]>"); out.println("</script>"); // Inject the brat stylesheet (removing this line breaks visualisation). out.println("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + bratLocation + "/style-vis.css\"/>"); // Include the Javascript libraries necessary to run brat. out.println("<script type=\"text/javascript\" src=\"" + bratLocation + "/client/lib/head.load.min.js\"></script>"); // Main Javascript that hooks into all that we have introduced so far. out.println("<script type=\"text/javascript\" src=\"brat.js\"></script>"); // Link to brat, I hope this is okay to have here... out.println("<h>Visualisation provided using the " + "<a href=\"http://brat.nlplab.org/\">brat " + "visualisation/annotation software</a>.</h>"); out.println("<br/>"); } public void outputPretty(PrintWriter out, Annotation annotation) throws ServletException { try { Document input = XMLOutputter.annotationToDoc(annotation, pipeline); Nodes output = corenlpTransformer.transform(input); for (int i = 0; i < output.size(); i++) { out.print(output.get(i).toXML()); } } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new ServletException(e); } } public void outputByWriter(Consumer<StringWriter> printer, PrintWriter out) throws IOException { StringWriter output = new StringWriter(); printer.accept(output); output.flush(); String escapedXml = StringEscapeUtils.escapeHtml4(output.toString()); String[] lines = escapedXml.split("\n"); out.print("<div><pre>"); for (String line : lines) { int numSpaces = 0; while (numSpaces < line.length() && line.charAt(numSpaces) == ' ') { out.print(" "); ++numSpaces; } out.print(line.substring(numSpaces)); out.print("\n"); } out.print("</pre></div>"); } public void outputXml(PrintWriter out, Annotation annotation) throws IOException { outputByWriter(writer -> { try { pipeline.xmlPrint(annotation, writer); } catch (IOException e) { throw new RuntimeIOException(e); } }, out); } public void outputJson(PrintWriter out, Annotation annotation) throws IOException { outputByWriter(writer -> { try { pipeline.jsonPrint(annotation, writer); } catch (IOException e) { throw new RuntimeIOException(e); } }, out); } public void outputCoNLL(PrintWriter out, Annotation annotation) throws IOException { outputByWriter(writer -> { try { pipeline.conllPrint(annotation, writer); } catch (IOException e) { throw new RuntimeIOException(e); } }, out); } }