package edu.stanford.nlp.wordseg; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.sequences.SeqClassifierFlags; import junit.framework.TestCase; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; /** @author KellenSunderland (public domain contribution) */ public class ChineseStringUtilsTest extends TestCase { private static final Integer SEGMENT_ATTEMPTS_PER_THREAD = 100; private static final Integer THREADS = 8; /** * A small test with stubbed data that is meant to expose multithreading initialization errors * in combineSegmentedSentence. * * In my testing this reliably reproduces the crash seen in the issue: * https://github.com/stanfordnlp/CoreNLP/issues/263 * * @throws Exception Various exceptions including Interrupted, all of which should be handled by * failing the test. */ public void testMultithreadedCombineSegmentedSentence() throws Exception { SeqClassifierFlags flags = createTestFlags(); List<CoreLabel> labels = createTestTokens(); List<Future<Boolean>> tasks = new ArrayList<>(THREADS); ExecutorService executor = Executors.newFixedThreadPool(THREADS); for (int v = 0; v < THREADS; v++) { Future<Boolean> f = executor.submit(() -> { for (int i = 0; i < SEGMENT_ATTEMPTS_PER_THREAD; i++) { ChineseStringUtils.combineSegmentedSentence(labels, flags); } return true; }); tasks.add(f); } for (Future<Boolean> task : tasks) { // This assert will fail by throwing a propagated exception, if exceptions due to // multithreading issues (generally NPEs) were thrown during the test. assert (task.get()); } } // Arbitrary test input. We just need to segment something on multiple threads to reproduce // the issue private static List<CoreLabel> createTestTokens() { CoreLabel token = new CoreLabel(); token.setWord("你好,世界"); token.setValue("你好,世界"); token.set(CoreAnnotations.ChineseSegAnnotation.class, "1"); token.set(CoreAnnotations.AnswerAnnotation.class, "0"); List<CoreLabel> labels = new ArrayList<>(); labels.add(token); return labels; } // Somewhat arbitrary flags. We're just picking flags that will execute the problematic code // path. private static SeqClassifierFlags createTestFlags() { SeqClassifierFlags flags = new SeqClassifierFlags(); flags.sighanPostProcessing = true; flags.usePk = true; flags.keepEnglishWhitespaces = false; flags.keepAllWhitespaces = false; return flags; } }