package edu.stanford.nlp.pipeline; import java.util.List; import java.util.concurrent.RejectedExecutionException; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.RuntimeInterruptedException; import edu.stanford.nlp.util.concurrent.InterruptibleMulticoreWrapper; import edu.stanford.nlp.util.concurrent.ThreadsafeProcessor; /** * A parent class for annotators which might want to analyze one * sentence at a time, possibly in a multithreaded manner. * * TODO: also factor out the POS * * @author John Bauer */ public abstract class SentenceAnnotator implements Annotator { protected class AnnotatorProcessor implements ThreadsafeProcessor<CoreMap, CoreMap> { final Annotation annotation; AnnotatorProcessor(Annotation annotation) { this.annotation = annotation; } @Override public CoreMap process(CoreMap sentence) { doOneSentence(annotation, sentence); return sentence; } @Override public ThreadsafeProcessor<CoreMap, CoreMap> newInstance() { return this; } } private InterruptibleMulticoreWrapper<CoreMap, CoreMap> buildWrapper(Annotation annotation) { InterruptibleMulticoreWrapper<CoreMap, CoreMap> wrapper = new InterruptibleMulticoreWrapper<>(nThreads(), new AnnotatorProcessor(annotation), true, maxTime()); return wrapper; } @Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { if (nThreads() != 1 || maxTime() > 0) { InterruptibleMulticoreWrapper<CoreMap, CoreMap> wrapper = buildWrapper(annotation); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { boolean success = false; // We iterate twice for each sentence so that if we fail for // a sentence once, we start a new queue and try again. // If the sentence fails a second time we give up. for (int attempt = 0; attempt < 2; ++attempt) { try { wrapper.put(sentence); success = true; break; } catch (RejectedExecutionException e) { // If we time out, for now, we just throw away all jobs which were running at the time. // Note that in order for this to be useful, the underlying job needs to handle Thread.interrupted() List<CoreMap> failedSentences = wrapper.joinWithTimeout(); if (failedSentences != null) { for (CoreMap failed : failedSentences) { doOneFailedSentence(annotation, failed); } } // We don't wait for termination here, and perhaps this // is a mistake. If the processor used does not respect // interruption, we could easily create many threads // which are all doing useless work. However, there is // no clean way to interrupt the thread and then // guarantee it finishes without running the risk of // waiting forever for the thread to finish, which is // exactly what we don't want with the timeout. wrapper = buildWrapper(annotation); } } if (!success) { doOneFailedSentence(annotation, sentence); } while (wrapper.peek()) { wrapper.poll(); } } List<CoreMap> failedSentences = wrapper.joinWithTimeout(); while (wrapper.peek()) { wrapper.poll(); } if (failedSentences != null) { for (CoreMap failed : failedSentences) { doOneFailedSentence(annotation, failed); } } } else { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { if (Thread.interrupted()) { throw new RuntimeInterruptedException(); } doOneSentence(annotation, sentence); } } } else { throw new RuntimeException("unable to find sentences in: " + annotation); } } protected abstract int nThreads(); /** * The maximum time to run this annotator for, in milliseconds. */ protected abstract long maxTime(); /** annotation is included in case there is global information we care about */ protected abstract void doOneSentence(Annotation annotation, CoreMap sentence); /** * Fills in empty annotations for trees, tags, etc if the annotator * failed or timed out. Not supposed to do major processing. * * @param annotation The whole Annotation object, in case it is needed for context. * @param sentence The particular sentence to process */ protected abstract void doOneFailedSentence(Annotation annotation, CoreMap sentence); }