package edu.stanford.nlp.pipeline; import edu.stanford.nlp.util.logging.StanfordRedwoodConfiguration; import junit.framework.TestCase; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Properties; /** * A slow itest that just runs the pipeline over a whole bunch of * documents, making sure it doesn't crash on any of them. Not sure * what to do with the output, if anything. * * @author John Bauer * @author Gabor Angeli (parallelism test) */ public class StanfordCoreNLPSlowITest extends TestCase { static List<File> getFileList() { List<File> files = new ArrayList<File>(); File pathFile = new File("/u/nlp/ACE2005/" + "ACE2005_Multilingual_LDC2006T06/data/English"); for (File subFile : pathFile.listFiles()) { if (!subFile.isDirectory()) { continue; } for (File subSubFile : subFile.listFiles()) { if (!subSubFile.isDirectory() || !subSubFile.getName().equals("timex2norm")) { continue; } for (File sgmlFile : subSubFile.listFiles()) { if (sgmlFile.isDirectory() || !sgmlFile.getName().endsWith(".sgm")) { continue; } files.add(sgmlFile); } } } return files; } @Override public void setUp(){ StanfordRedwoodConfiguration.minimalSetup(); } private static StanfordCoreNLP buildPipeline() throws IOException { List<File> files = getFileList(); File dir = File.createTempFile("StanfordCoreNLPSlowITest", ""); dir.delete(); dir.mkdir(); dir.deleteOnExit(); System.out.println("Temp path: " + dir.getPath()); Properties props = new Properties(); props.setProperty("outputDirectory", dir.getPath()); props.setProperty("annotators", "tokenize, cleanxml, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("serializer", "AnnotationSerializer"); return new StanfordCoreNLP(props); } public void testNoCrashes() throws IOException { StanfordCoreNLP pipeline = buildPipeline(); for (File file : getFileList()) { try { pipeline.processFiles(Collections.singletonList(file)); } catch (Exception e) { // process files one at a time and rethrow exceptions so that // we know which file caused the problem throw new RuntimeException("Failed to process file " + file, e); } } } public void testParallelism() throws IOException { StanfordCoreNLP pipeline = buildPipeline(); pipeline.processFiles(getFileList(), Runtime.getRuntime().availableProcessors()); } }