package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.util.PropertiesUtils;
import junit.framework.TestCase;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import edu.stanford.nlp.io.IOUtils;
public class ThreadedStanfordCoreNLPSlowITest extends TestCase {
static List<String> convertAnnotations(List<Annotation> annotations,
StanfordCoreNLP pipeline)
throws IOException
{
List<String> converted = new ArrayList<String>();
for (Annotation annotation : annotations) {
StringWriter out = new StringWriter();
pipeline.xmlPrint(annotation, out);
converted.add(out.toString());
}
return converted;
}
static List<String> getAnnotations(List<File> files,
StanfordCoreNLP pipeline)
throws IOException
{
List<Annotation> annotations = new ArrayList<>();
for (File file : files) {
String text = IOUtils.slurpFile(file);
Annotation annotation = pipeline.process(text);
annotations.add(annotation);
System.out.println("Processed " + annotations.size());
}
return convertAnnotations(annotations, pipeline);
}
static class CoreNLPThread extends Thread {
List<String> annotations;
private final List<File> files;
private final StanfordCoreNLP pipeline;
CoreNLPThread(List<File> files, StanfordCoreNLP pipeline) {
this.files = files;
this.pipeline = pipeline;
}
@Override
public void run() {
try {
annotations = getAnnotations(files, pipeline);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
private static final int numThreads = 2;
private static final int numDocs = 10;
public void testTwoThreads() throws Exception {
StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties("maxAdditionalKnownLCWords", "0"));
List<File> files = StanfordCoreNLPSlowITest.getFileList();
files = files.subList(0, numDocs);
List<String> baseline = getAnnotations(files, pipeline);
CoreNLPThread[] threads = new CoreNLPThread[numThreads];
for (int i = 0; i < numThreads; ++i) {
threads[i] = new CoreNLPThread(files, pipeline);
threads[i].start();
}
for (int i = 0; i < numThreads; ++i) {
threads[i].join();
assertEquals("Thread " + i + " did not produce " +
baseline.size() + " results",
baseline.size(), threads[i].annotations.size());
}
for (int i = 0; i < baseline.size(); ++i) {
//System.out.println("Baseline " + i + ":");
//System.out.println(baseline.get(i));
for (int j = 0; j < numThreads; ++j) {
//System.out.println("Thread " + j + " annotation " + i + ":");
//System.out.println(threads[j].annotations.get(i));
assertEquals("Annotating document " + i + ": thread " + j + " produced annotation:\n" +
threads[j].annotations.get(i) + "versus the baseline:\n" + baseline.get(i),
baseline.get(i), threads[j].annotations.get(i));
}
}
}
}