package semanticMarkup.ling.learn.knowledge; import static org.junit.Assert.*; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; import org.junit.Before; import org.junit.Test; import semanticMarkup.know.lib.WordNetPOSKnowledgeBase; import semanticMarkup.ling.learn.Configuration; import semanticMarkup.ling.learn.Learner; import semanticMarkup.ling.learn.dataholder.DataHolder; import semanticMarkup.ling.learn.utility.LearnerUtility; import semanticMarkup.ling.transform.ITokenizer; import semanticMarkup.ling.transform.lib.OpenNLPSentencesTokenizer; import semanticMarkup.ling.transform.lib.OpenNLPTokenizer; public class CoreBootstrappingLearnerTest { private CoreBootstrappingLearner tester; private Configuration configuration; private LearnerUtility myLearnerUtility; @Before public void initialize() { this.configuration = new Configuration(); ITokenizer sentenceDetector = new OpenNLPSentencesTokenizer( configuration.getOpenNLPSentenceDetectorDir()); ITokenizer tokenizer = new OpenNLPTokenizer(configuration.getOpenNLPTokenizerDir()); WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null; try { wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(configuration.getWordNetDictDir(), false); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } this.myLearnerUtility = new LearnerUtility(sentenceDetector, tokenizer, wordNetPOSKnowledgeBase); this.tester = new CoreBootstrappingLearner(myLearnerUtility, configuration); } @Test public void testBuildPattern() { CoreBootstrappingLearner myTester = this.tester; DataHolder myDataHolder = new DataHolder(this.configuration, myLearnerUtility.getConstant(), myLearnerUtility.getWordFormUtility()); // Method buildPattern // assertEquals( // "buildPattern", // "(?:^\\b(?:one|two|three)\\b|^\\w+\\s\\b(?:one|two|three)\\b|^\\w+\\s\\w+\\s\\b(?:one|two|three)\\b)", // tester.buildPattern("one two three".split(" "))); HashSet<String> wordSet= new HashSet<String>(); wordSet.add("teeth"); wordSet.add("unicuspid"); wordSet.add("with"); myDataHolder.setCheckedWordSet(wordSet); assertEquals("buildPattern", null, myTester.buildPattern(myDataHolder, "teeth ; 9".split(" "))); assertEquals("buildPattern", "(?:^\\b(?:variously|arranged)\\b|^\\w+\\s\\b(?:variously|arranged)\\b|^\\w+\\s\\w+\\s\\b(?:variously|arranged)\\b).*$", myTester.buildPattern(myDataHolder, "teeth variously arranged".split(" "))); wordSet.add("circuli"); wordSet.add("present"); wordSet.add("on"); wordSet.add("hyohyoidei"); wordSet.add("muscle"); assertEquals("buildPattern", "(?:^\\b(?:does|not|cross)\\b|^\\w+\\s\\b(?:does|not|cross)\\b|^\\w+\\s\\w+\\s\\b(?:does|not|cross)\\b).*$", myTester.buildPattern(myDataHolder, "does not cross".split(" "))); wordSet.addAll(Arrays.asList("lepidotrichia:of:passes:between:bases".split(":"))); assertEquals("buildPattern", "(?:^\\b(?:ankylosed|to)\\b|^\\w+\\s\\b(?:ankylosed|to)\\b|^\\w+\\s\\w+\\s\\b(?:ankylosed|to)\\b).*$", myTester.buildPattern(myDataHolder, "teeth ankylosed to".split(" "))); } }