package edu.stanford.nlp.pipeline;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.MutableLong;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.logging.Redwood;
/**
* This class is designed to apply multiple Annotators to an Annotation. The idea is that you first build up the pipeline by adding Annotators, and
* then you take the objects you wish to annotate and pass them in and get back in return a fully annotated object. Please see the package level
* javadoc for sample usage and a more complete description.
*
* @author Jenny Finkel
*/
public class SzTEAnnotationPipeline implements Annotator {
protected static final boolean TIME = true;
private final List<Annotator> annotators;
private List<MutableLong> accumulatedTime;
public SzTEAnnotationPipeline(List<Annotator> annotators) {
this.annotators = annotators;
if (TIME) {
int num = annotators.size();
accumulatedTime = new ArrayList<MutableLong>(num);
for (int i = 0; i < num; i++) {
accumulatedTime.add(new MutableLong());
}
}
}
public SzTEAnnotationPipeline() {
this(new ArrayList<Annotator>());
}
public void addAnnotator(Annotator annotator) {
annotators.add(annotator);
if (TIME) {
accumulatedTime.add(new MutableLong());
}
}
public void annotate(Annotation annotation, Set<String> subAnnotators) {
Iterator<MutableLong> it = accumulatedTime.iterator();
Timing t = new Timing();
for (Annotator annotator : annotators) {
if (subAnnotators == null || subAnnotators.contains(annotator.getClass().getSimpleName().replace("Annotator", ""))) {
if (TIME) {
t.start();
}
annotator.annotate(annotation);
if (TIME) {
int elapsed = (int) t.stop();
MutableLong m = it.next();
m.incValue(elapsed);
}
}
}
}
// ->EXTENSION
/**
* Run the pipeline on an input annotation. The annotation is modified in place
*
* @param annotation
* The input annotation, usually a raw document
*/
public void annotate(Annotation annotation) {
annotate(annotation, null);
}
/**
* Annotate a collection of input annotations IN PARALLEL, making use of all available cores.
*
* @param annotations
* The input annotations to process
*/
public void annotate(Iterable<Annotation> annotations) {
annotate(annotations, Runtime.getRuntime().availableProcessors());
}
/**
* Annotate a collection of input annotations IN PARALLEL, making use of all available cores.
*
* @param annotations
* The input annotations to process
* @param callback
* A function to be called when an annotation finishes. The return value of the callback is ignored.
*/
public void annotate(final Iterable<Annotation> annotations, final Function<Annotation, Object> callback) {
annotate(annotations, Runtime.getRuntime().availableProcessors(), callback);
}
/**
* Annotate a collection of input annotations IN PARALLEL, making use of threads given in numThreads.
*
* @param annotations
* The input annotations to process
* @param numThreads
* The number of threads to run on
*/
public void annotate(final Iterable<Annotation> annotations, int numThreads) {
annotate(annotations, numThreads, new Function<Annotation, Object>() {
@Override
public Object apply(Annotation in) {
return null;
}
});
}
/**
* Annotate a collection of input annotations IN PARALLEL, making use of threads given in numThreads
*
* @param annotations
* The input annotations to process
* @param numThreads
* The number of threads to run on
* @param callback
* A function to be called when an annotation finishes. The return value of the callback is ignored.
*/
public void annotate(final Iterable<Annotation> annotations, int numThreads, final Function<Annotation, Object> callback) {
// case: single thread (no point in spawning threads)
if (numThreads == 1) {
for (Annotation ann : annotations) {
annotate(ann);
callback.apply(ann);
}
}
// Java's equivalent to ".map{ lambda(annotation) => annotate(annotation) }
Iterable<Runnable> threads = new Iterable<Runnable>() {
@Override
public Iterator<Runnable> iterator() {
final Iterator<Annotation> iter = annotations.iterator();
return new Iterator<Runnable>() {
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public Runnable next() {
if (!iter.hasNext()) {
throw new NoSuchElementException();
}
final Annotation input = iter.next();
return new Runnable() {
@Override
public void run() {
// (logging)
String beginningOfDocument = input.toString().substring(0, Math.min(50, input.toString().length()));
Redwood.startTrack("Annotating \"" + beginningOfDocument + "...\"");
// (annotate)
annotate(input);
// (callback)
callback.apply(input);
// (logging again)
Redwood.endTrack("Annotating \"" + beginningOfDocument + "...\"");
}
};
}
@Override
public void remove() {
iter.remove();
}
};
}
};
// Thread
Redwood.Util.threadAndRun(this.getClass().getSimpleName(), threads, numThreads);
}
/**
* Return the total pipeline annotation time in milliseconds.
*
* @return The total pipeline annotation time in milliseconds
*/
protected long getTotalTime() {
long total = 0;
for (MutableLong m : accumulatedTime) {
total += m.longValue();
}
return total;
}
/**
* Return a String that gives detailed human-readable information about how much time was spent by each annotator and by the entire annotation
* pipeline. This String includes newline characters but does not end with one, and so it is suitable to be printed out with a {@code println()}.
*
* @return Human readable information on time spent in processing.
*/
public String timingInformation() {
StringBuilder sb = new StringBuilder();
if (TIME) {
sb.append("Annotation pipeline timing information:\n");
Iterator<MutableLong> it = accumulatedTime.iterator();
long total = 0;
for (Annotator annotator : annotators) {
MutableLong m = it.next();
sb.append(StringUtils.getShortClassName(annotator)).append(": ");
sb.append(Timing.toSecondsString(m.longValue())).append(" sec.\n");
total += m.longValue();
}
sb.append("TOTAL: ").append(Timing.toSecondsString(total)).append(" sec.");
}
return sb.toString();
}
@Override
public Set<Requirement> requirementsSatisfied() {
Set<Requirement> satisfied = Generics.newHashSet();
for (Annotator annotator : annotators) {
satisfied.addAll(annotator.requirementsSatisfied());
}
return satisfied;
}
@Override
public Set<Requirement> requires() {
if (annotators.isEmpty()) {
return Collections.emptySet();
}
return annotators.get(0).requires();
}
}