package com.ntrepid.tartan;
import java.io.*;
import java.util.*;
import java.util.zip.*;
import javax.servlet.*;
import javax.servlet.http.*;
import edu.stanford.nlp.ie.*;
import edu.stanford.nlp.ie.crf.*;
/**
* This is a servlet interface to the CRFClassifier.
*
* @author Dat Hoang 2011
*
**/
public class NERServlet extends HttpServlet
{
private String format;
private boolean spacing;
private String default_classifier;
private String[] classifiers;
private HashMap<String, AbstractSequenceClassifier> ners;
public void init() throws ServletException {
format = getServletConfig().getInitParameter("outputFormat");
if (format == null || format.trim().equals(""))
throw new ServletException("Invalid outputFormat setting.");
String spacingStr = getServletConfig().getInitParameter("preserveSpacing");
if (spacingStr == null || spacingStr.trim().equals(""))
throw new ServletException("Invalid preserveSpacing setting.");
//spacing = Boolean.valueOf(spacingStr).booleanValue();
spacing = spacingStr.trim().toLowerCase().equals("true");
default_classifier = getServletConfig().getInitParameter("default-classifier");
if (default_classifier == null || default_classifier.trim().equals(""))
throw new ServletException("Default classifier not given.");
String classifiersStr = getServletConfig().getInitParameter("classifiers");
if (classifiersStr == null || classifiersStr.trim().equals(""))
throw new ServletException("List of classifiers not given.");
classifiers = classifiersStr.split("\\s+");
ners = new HashMap<String, AbstractSequenceClassifier>();
for (String classifier : classifiers) {
AbstractSequenceClassifier asc = null;
String filename = getServletConfig().getInitParameter(classifier);
InputStream is = getServletConfig().getServletContext().getResourceAsStream(filename);
if (is == null)
throw new ServletException("File not found. Filename = " + filename);
try {
if (filename.endsWith(".gz")) {
is = new BufferedInputStream(new GZIPInputStream(is));
} else {
is = new BufferedInputStream(is);
}
asc = CRFClassifier.getClassifier(is);
} catch (IOException e) {
throw new ServletException("IO problem reading classifier.", e);
} catch (ClassCastException e) {
throw new ServletException("Classifier class casting problem.", e);
} catch (ClassNotFoundException e) {
throw new ServletException("Classifier class not found problem.", e);
} finally {
try {
is.close();
} catch (IOException e) {
//do nothing
}
}
ners.put(classifier, asc);
}
}
public void doGet(HttpServletRequest req, HttpServletResponse res)
throws ServletException, IOException {
doPost(req, res);
}
public void doPost(HttpServletRequest req, HttpServletResponse res)
throws ServletException, IOException {
String input = req.getParameter("input");
String outputFormat = req.getParameter("outputFormat");
if (outputFormat == null || outputFormat.trim().equals("")) {
outputFormat = this.format;
}
boolean preserveSpacing;
String preserveSpacingStr = req.getParameter("preserveSpacing");
if (preserveSpacingStr == null || preserveSpacingStr.trim().equals("")) {
preserveSpacing = this.spacing;
} else {
//preserveSpacing = Boolean.getBoolean(preserveSpacingStr);
preserveSpacing = preserveSpacingStr.trim().toLowerCase().equals("true");
}
String classifier = req.getParameter("classifier");
if (classifier == null || classifier.trim().equals("")) {
classifier = this.default_classifier;
}
AbstractSequenceClassifier tagger = ners.get(classifier);
res.setContentType("text/plain");
res.addHeader("classifier", classifier);
res.addHeader("outputFormat", outputFormat);
res.addHeader("preserveSpacing", String.valueOf(preserveSpacing));
PrintWriter out = res.getWriter();
if(outputFormat.equals("offsets")) {
out.print(ners.get(classifier).classifyToCharacterOffsets(input));
} else {
//entire blob of text
out.print(ners.get(classifier).classifyToString(input, outputFormat, preserveSpacing));
}
//sentence-by-sentence
/*for (String sentence: input.split("\n")) {
out.println(tagger.classifyToString(sentence, outputFormat, preserveSpacing));
}*/
}
}