package edu.stanford.nlp.pipeline; import edu.stanford.nlp.ling.CoreAnnotation; import edu.stanford.nlp.util.StreamGobbler; import edu.stanford.nlp.util.StringUtils; import edu.stanford.nlp.util.logging.Redwood; import java.io.*; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.time.Duration; import java.util.Collections; import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeoutException; /** * A common base class for annotators that talk to other web servers. * * The important things to do to implement this is: * * <ol> * <li>Implement {@link #annotateImpl(Annotation)} with the code to actually call the server.</li> * <li>Implement {@link #ready(boolean initialTest)} with code to check if the server is available. {@link #ping(String)} may be useful for this.</li> * <li>Optionally implement {@link #startCommand()} with a command to start a local server. If this is specified, we will start * a local server before we start checking for readiness. * Note that the {@link #ready(boolean initialTest)} endpoint does still have to point to this local server in that case, or else * lifecycle won't be managed properly. * </li> * </ol> * * @author <a href="mailto:gabor@eloquent.ai">Gabor Angeli</a> */ public abstract class WebServiceAnnotator implements Annotator { /** A logger from this class. */ private static Redwood.RedwoodChannels log = Redwood.channels(WebServiceAnnotator.class); /** A timeout to wait for a server to boot up. Beyond this, we simply give up and throw an exception. */ private static long CONNECT_TIMEOUT = Duration.ofMinutes(15).toMillis(); /** * Thrown if we could not annotate, but there's hope to either reconnect or restart the server. * Will still only try to connect 3 times. * This is the usual exception. */ public static class ShouldRetryException extends Exception { public ShouldRetryException() {} } /** An exception thrown if we could not connect to the server, and shouldn't retry / recreate the server. */ @SuppressWarnings("unused") public static class PermanentlyFailedException extends Exception { public PermanentlyFailedException() {} public PermanentlyFailedException(Throwable t) { super(t); } } /** * A class encapsulating a running server process. */ private class RunningProcess { /** The actual running process. */ public final Process process; /** The output stream gobbler, redirecting the stream to stdout. */ public final StreamGobbler stdout; /** The error stream gobbler, redirecting the stream to stderr. */ public final StreamGobbler stderr; /** If true, the server is presumed ready to accept connections. */ public boolean ready = false; /** A shutdown hook to clean up this process on shutdown. */ private final Thread shutdownHoook; /** A straightforward constructor. */ private RunningProcess(Process process) { this.process = process; Writer errWriter = new BufferedWriter(new OutputStreamWriter(System.err)); this.stderr = new StreamGobbler(process.getErrorStream(), errWriter); this.stderr.start(); Writer outWriter = new BufferedWriter(new OutputStreamWriter(System.out)); this.stdout = new StreamGobbler(process.getErrorStream(), outWriter); this.stdout.start(); this.shutdownHoook = new Thread(() -> { log.info("Killing process " + WebServiceAnnotator.this); this.stdout.kill(); this.stderr.kill(); if (this.process.isAlive()) { this.process.destroy(); } this.ready = false; }); Runtime.getRuntime().addShutdownHook(this.shutdownHoook); } /** Kills this process, and kills the stream gobblers waiting on it. */ public void kill() { Runtime.getRuntime().removeShutdownHook(shutdownHoook); shutdownHoook.run(); } /** Make sure we clean up this annotator! */ protected void finalize() throws Throwable { try { super.finalize(); } finally { kill(); } } } /** If true, we have connected to the server at some point. */ protected boolean everLive = false; /** If true, the server was active last time checked */ protected boolean serverWasActive = false; /** The running server, if any. */ private Optional<RunningProcess> server = Optional.empty(); /** * The command to run to start the server, if any. * If no command is given, we assume it's being managed by someone else (e.g., an external * running service). * * @return The command we should start, or {@link Optional#empty()} if we don't want CoreNLP * to manage the server. */ protected abstract Optional<String[]> startCommand(); /** * An optional command provided to run to shut down the server. */ protected abstract Optional<String[]> stopCommand(); /** * Check if the server is ready to accept annotations. * This client will wait until the ready endpoint returns true. * * @param initialTest testing a server that has just been started? * * @return True if the server is ready to accept documents to annotate. */ protected abstract boolean ready(boolean initialTest); /** * Actually annotate a document with the server. * * @param ann The document to annotate. * * @throws ShouldRetryException Thrown if we could not annotate the document, but we could plausibly retry. * @throws PermanentlyFailedException Thrown if we could not annotate the document and should not retry. */ protected abstract void annotateImpl(Annotation ann) throws ShouldRetryException, PermanentlyFailedException; /** * Check if the server is live. Can be overwritten if it differs from {@link #ready(boolean initialTest)}. * * @return True if the server is live. */ protected boolean live() { return true; } /** * A utility to ping an endpoint. Useful for {@link #live()} and {@link #ready(boolean initialTest)}. * * @param uri The URL we are trying to ping. * * @return True if we got any non-5XX response from the endpoint. */ protected boolean ping(String uri) { try { URL url = new URL(uri); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestProperty("Accept-Charset", "UTF-8"); connection.setRequestMethod("GET"); connection.connect(); int code = connection.getResponseCode(); return code < 500 || code >= 600; } catch (MalformedURLException e) { log.warn("Could not parse URL: " + uri); return false; } catch (ClassCastException e) { log.warn("Not an HTTP URI"); return false; } catch (IOException e) { return false; } } /** * Start the actual server. * * @param command the command we are using to start the sever. * * @return True if the server was started; false otherwise. */ private boolean startServer(String[] command) { ProcessBuilder proc = new ProcessBuilder(command); try { synchronized (this) { this.server = Optional.of(new RunningProcess(proc.start())); } log.info("Started server " + StringUtils.join(command)); return true; } catch (IOException e) { log.error("Could not start process: " + StringUtils.join(command)); return false; } } /** * Ensure that the server we're trying to connect to exists. * This is certainly called from {@link #annotate(Annotation)}, but can also * be called from the constructor of the annotator to cache startup times. * * @throws TimeoutException Thrown if we could not connect to the server for the timeout period. * @throws IOException Thrown if we could not start the server process. */ protected void ensureServer() throws TimeoutException, IOException { long startTime = System.currentTimeMillis(); // if the server was active last time we checked, see if the server is still active if (serverWasActive) { if (ready(false)) return; } // 1. Start a server, if applicable boolean serverStarted = startCommand().map(this::startServer).orElse(true); if (!serverStarted) { throw new IOException("Could not start a local server!"); } // 2. Wait for the target server to come online while (!everLive) { if (System.currentTimeMillis() > startTime + CONNECT_TIMEOUT) { throw new TimeoutException("Could not connect to annotator: " + this); } if (!live()) { try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } else { everLive = true; } } log.info("Got liveness from server for " + this); // 3. Wait for the target server to become ready synchronized (this) { if (this.server.isPresent()) { while (!this.server.get().ready) { if (System.currentTimeMillis() > startTime + CONNECT_TIMEOUT) { throw new TimeoutException("Never got readiness from annotator: " + this); } if (!ready(true)) { try { Thread.sleep(1000); } catch (InterruptedException ignored) { } } else { this.server.get().ready = true; } } } } log.info("Got readiness from server for " + this); serverWasActive = true; // 4. Server is ensured! We can continue } /** {@inheritDoc} */ public void unmount() { log.info("Unmounting server: " + this); synchronized (this) { if (this.server.isPresent()) { this.server.get().kill(); this.server = Optional.empty(); } // run optional stop script try { if (stopCommand().isPresent()) { ProcessBuilder proc = new ProcessBuilder(stopCommand().get()); proc.start(); } } catch (Exception e) { log.error("Error: problem with running stop command for WebServiceAnnotator"); } } } /** {@inheritDoc} */ public void annotate(Annotation annotation) { annotate(annotation, 0); } /** * The actual implementation of {@link Annotator#annotate(Annotation)}. * This calls {@link #annotateImpl(Annotation)}, which should actually make the server calls. * This method just handles starting/stopping the server, and waiting for readiness * * @param annotation The annotation to annotate. * @param tries The number of times we have tried to annotate this document. */ private void annotate(Annotation annotation, int tries) { try { // 1. Ensure that we have a server to annotate against synchronized(this) { ensureServer(); } try { // 2. Annotate the document annotateImpl(annotation); } catch (PermanentlyFailedException e) { // 3A. We've failed to annotate. Give up // 3A.1. Stop the server synchronized (this) { if (this.server.isPresent()) { this.server.get().kill(); this.server = Optional.empty(); } } // 3A.1. Throw an exception Throwable cause = e.getCause(); if (cause != null && cause instanceof RuntimeException) { throw (RuntimeException) cause; } else if (cause != null) { throw new RuntimeException(cause); } else { throw new RuntimeException(e); } } catch (ShouldRetryException e) { // 3B. We've failed to annotate, but should maybe retry // 3B.1. Stop the server, if this is our third try synchronized (this) { if (tries >= 2 && this.server.isPresent()) { this.server.get().kill(); this.server = Optional.empty(); } } // 3B.2. Retry if (tries < 3) { annotate(annotation, tries + 1); } else { throw new RuntimeException("Could not annotate document after 3 tries:", e); } } } catch (TimeoutException | IOException e) { throw new RuntimeException("Could not ensure a server:", e); } } /** * A quick script to debug server lifecycle. */ public static void main(String[] args) throws InterruptedException { WebServiceAnnotator annotator = new WebServiceAnnotator(){ @Override public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() { return Collections.emptySet(); } @Override public Set<Class<? extends CoreAnnotation>> requires() { return Collections.emptySet(); } @Override protected Optional<String[]> startCommand() { return Optional.of(new String[]{"bash", "script.sh"}); } @Override protected Optional<String[]> stopCommand() { return Optional.empty(); } @Override protected boolean ready(boolean initialTest) { return this.ping("http://localhost:8000"); } @Override protected void annotateImpl(Annotation ann) throws ShouldRetryException, PermanentlyFailedException { log.info("Fake annotated! ping=" + this.ping("http://localhost:8000")); } public String toString() { return "<test WebServiceAnnotator>"; } }; Annotation ann = new Annotation(""); annotator.annotate(ann); } }