package edu.stanford.nlp.ie; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.ie.crf.CRFClassifier; import edu.stanford.nlp.util.StringUtils; import edu.stanford.nlp.io.EncodingPrintWriter; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.OutputStreamWriter; import java.net.ServerSocket; import java.net.Socket; import java.net.UnknownHostException; import java.util.Properties; /***************************************************************************** * A named-entity recognizer server for Stanford's NER. * Runs on a socket and waits for text to annotate and returns the * annotated text. (Internally, it uses the <code>classifyString()</code> * method on a classifier, which can be either the default CRFClassifier * which is serialized inside the jar file from which it is called, or another * classifier which is passed as an argument to the main method. * * @version $Id$ * @author * Bjorn Aldag<BR> * Copyright © 2000 - 2004 Cycorp, Inc. All rights reserved. * Permission granted for Stanford to distribute with their NER code * by Bjorn Aldag * @author Christopher Manning 2006 (considerably rewritten) * *****************************************************************************/ public class NERServer { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(NERServer.class); //// Variables /** * Debugging toggle. */ private static final boolean ENV_DEBUG = ((System.getenv("NERSERVER_DEBUG") != null) ? Boolean.parseBoolean(System.getenv("NERSERVER_DEBUG")) : false); private boolean DEBUG = ENV_DEBUG; private final String charset; /** * The listener socket of this server. */ private final ServerSocket listener; /** * The classifier that does the actual tagging. */ private final AbstractSequenceClassifier ner; //// Constructors /** * Creates a new named entity recognizer server on the specified port. * * @param port the port this NERServer listens on. * @param asc The classifier which will do the tagging * @param charset The character set for encoding Strings over the socket stream, e.g., "utf-8" * @throws IOException If there is a problem creating a ServerSocket */ public NERServer(int port, AbstractSequenceClassifier asc, String charset) throws IOException { ner = asc; listener = new ServerSocket(port); this.charset = charset; } //// Public Methods /** * Runs this named entity recognizer server. */ @SuppressWarnings({"InfiniteLoopStatement", "ConstantConditions", "null"}) public void run() { Socket client = null; while (true) { try { client = listener.accept(); if (DEBUG) { log.info("Accepted request from "); log.info(client.getInetAddress().getHostName()); } new Session(client); } catch (Exception e1) { log.info("NERServer: couldn't accept"); e1.printStackTrace(System.err); try { client.close(); } catch (Exception e2) { log.info("NERServer: couldn't close client"); e2.printStackTrace(System.err); } } } } //// Inner Classes /** * A single user session, accepting one request, processing it, and * sending back the results. */ private class Session extends Thread { //// Instance Fields /** * The socket to the client. */ private final Socket client; /** * The input stream from the client. */ private final BufferedReader in; /** * The output stream to the client. */ private PrintWriter out; //// Constructors private Session(Socket socket) throws IOException { client = socket; in = new BufferedReader(new InputStreamReader(client.getInputStream(), charset)); out = new PrintWriter(new OutputStreamWriter(client.getOutputStream(), charset)); start(); } //// Public Methods /** * Runs this session by reading a string, tagging it, and writing * back the result. The input should be a single line (no embedded * newlines), which represents a whole sentence or document. */ @Override public void run() { if (DEBUG) {log.info("Created new session");} String input = null; try { // TODO: why not allow for multiple lines of input? input = in.readLine(); if (DEBUG) { EncodingPrintWriter.err.println("Receiving: \"" + input + '\"', charset); } } catch (IOException e) { log.info("NERServer:Session: couldn't read input"); e.printStackTrace(System.err); } catch (NullPointerException npe) { log.info("NERServer:Session: connection closed by peer"); npe.printStackTrace(System.err); } try { if (! (input == null)) { String output = ner.classifyToString(input, ner.flags.outputFormat, !"slashTags".equals(ner.flags.outputFormat)); if (DEBUG) { EncodingPrintWriter.err.println("Sending: \"" + output + '\"', charset); } out.print(output); out.flush(); } } catch (RuntimeException | OutOfMemoryError e) { // ah well, guess they won't be hearing back from us after all if (DEBUG) { log.error("NERServer.Session: error classifying string."); log.error(e); } } finally { close(); } } /** * Terminates this session gracefully. */ private void close() { try { in.close(); out.close(); if (DEBUG) { log.info("Closing connection to client"); log.info(client.getInetAddress().getHostName()); } client.close(); } catch (Exception e) { log.info("NERServer:Session: can't close session"); e.printStackTrace(System.err); } } } // end class Session /** This example sends material to the NER server one line at a time. * Each line should be at least a whole sentence, or can be a whole * document. */ public static class NERClient { private NERClient() {} public static void communicateWithNERServer(String host, int port, String charset) throws IOException { System.out.println("Input some text and press RETURN to NER tag it, " + " or just RETURN to finish."); BufferedReader stdIn = new BufferedReader(new InputStreamReader(System.in, charset)); communicateWithNERServer(host, port, charset, stdIn, null, true); stdIn.close(); } public static void communicateWithNERServer(String host, int port, String charset, BufferedReader input, BufferedWriter output, boolean closeOnBlank) throws IOException { if (host == null) { host = "localhost"; } for (String userInput; (userInput = input.readLine()) != null; ) { if (userInput.matches("\\n?")) { if (closeOnBlank) { break; } else { continue; } } try { // TODO: why not keep the same socket for multiple lines? Socket socket = new Socket(host, port); PrintWriter out = new PrintWriter(new OutputStreamWriter(socket.getOutputStream(), charset), true); BufferedReader in = new BufferedReader(new InputStreamReader( socket.getInputStream(), charset)); // send material to NER to socket out.println(userInput); // Print the results of NER String result; while ((result = in.readLine()) != null) { if (output == null) { EncodingPrintWriter.out.println(result, charset); } else { output.write(result); output.newLine(); } } in.close(); socket.close(); } catch (UnknownHostException e) { log.info("Cannot find host: "); log.info(host); return; } catch (IOException e) { log.info("I/O error in the connection to: "); log.info(host); return; } } } } // end static class NERClient private static final String USAGE = "Usage: NERServer [-loadClassifier file|-loadJarClassifier resource|-client] -port portNumber"; /** * Starts this server on the specified port. The classifier used can be * either a default one stored in the jar file from which this code is * invoked or you can specify it as a filename or as another classifier * resource name, which must correspond to the name of a resource in the * /classifiers/ directory of the jar file. * <p> * Default port is 4465. * </p><p> * When run in server mode, additional properties can be specified * on the command line and will be passed to the model loaded. * </p><p> * Usage: <code>java edu.stanford.nlp.ie.NERServer [-loadClassifier file|-loadJarClassifier resource|-client] -port portNumber</code> * * @param args Command-line arguments (described above) * @throws Exception If file or Java class problems with serialized classifier */ @SuppressWarnings({"StringEqualsEmptyString"}) public static void main (String[] args) throws Exception { Properties props = StringUtils.argsToProperties(args); String loadFile = props.getProperty("loadClassifier"); String loadJarFile = props.getProperty("loadJarClassifier"); String client = props.getProperty("client"); String portStr = props.getProperty("port", "4465"); props.remove("port"); // so later code doesn't complain if (portStr == null || portStr.equals("")) { log.info(USAGE); return; } String charset = "utf-8"; String encoding = props.getProperty("encoding"); if (encoding != null && ! "".equals(encoding)) { charset = encoding; } int port; try { port = Integer.parseInt(portStr); } catch (NumberFormatException e) { log.info("Non-numerical port"); log.info(USAGE); return; } // default output format for if no output format is specified if (props.getProperty("outputFormat") == null) { props.setProperty("outputFormat", "slashTags"); } if (client != null && ! client.equals("")) { // run a test client for illustration/testing String host = props.getProperty("host"); NERClient.communicateWithNERServer(host, port, charset); } else { AbstractSequenceClassifier asc; if (loadFile != null && ! loadFile.equals("")) { asc = CRFClassifier.getClassifier(loadFile, props); } else if (loadJarFile != null && ! loadJarFile.equals("")) { asc = CRFClassifier.getJarClassifier(loadJarFile, props); } else { asc = CRFClassifier.getDefaultClassifier(props); } new NERServer(port, asc, charset).run(); } } }