package semanticMarkup.ling.learn.knowledge; import static org.junit.Assert.*; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import org.junit.Before; import org.junit.Test; import semanticMarkup.know.lib.WordNetPOSKnowledgeBase; import semanticMarkup.ling.learn.Configuration; import semanticMarkup.ling.learn.Learner; import semanticMarkup.ling.learn.dataholder.DataHolder; import semanticMarkup.ling.learn.dataholder.SentenceStructure; import semanticMarkup.ling.learn.utility.LearnerUtility; import semanticMarkup.ling.learn.utility.WordFormUtility; import semanticMarkup.ling.transform.ITokenizer; import semanticMarkup.ling.transform.lib.OpenNLPSentencesTokenizer; import semanticMarkup.ling.transform.lib.OpenNLPTokenizer; public class UnknownWordBootstrappingTest { private UnknownWordBootstrappingLearner unknownWordBootstrappingModule; @Before public void initialize() { Configuration myConfiguration = new Configuration(); ITokenizer sentenceDetector = new OpenNLPSentencesTokenizer( myConfiguration.getOpenNLPSentenceDetectorDir()); ITokenizer tokenizer = new OpenNLPTokenizer(myConfiguration.getOpenNLPTokenizerDir()); WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null; try { wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(myConfiguration.getWordNetDictDir(), false); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } LearnerUtility learnerUtility = new LearnerUtility(sentenceDetector, tokenizer, wordNetPOSKnowledgeBase); this.unknownWordBootstrappingModule = new UnknownWordBootstrappingLearner(learnerUtility); } @Test public void testUnknownWordBootstrapping(){ // 1. Preprocessing // Learner myTester1 = learnerFactory(); // myTester1.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word1 unknown".split(" "))); // Set<String> expected = new HashSet<String>(); //// expected.add("") // assertEquals("unknownWordBootstrappingGetUnknownWord", expected , myTester1.unknownWordBootstrappingGetUnknownWord("(ee)")); // 3. Postprocessing DataHolder myDataHolder3 = dataholderFactory(); myDataHolder3.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"word1", "p", "role", "0", "0", "", ""})); myDataHolder3.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"word2", "b", "role", "0", "0", "", ""})); myDataHolder3.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"word3", "s", "role", "0", "0", "", ""})); myDataHolder3.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word1 word1".split(" "))); myDataHolder3.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word2 unknown".split(" "))); myDataHolder3.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("_wORd3 unknown".split(" "))); myDataHolder3.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word?_4 unknown".split(" "))); myDataHolder3.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("nor unknown".split(" "))); myDataHolder3.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word_6 unknown".split(" "))); myDataHolder3.getSentenceHolder().add(new SentenceStructure(0, "src", "word1 word_6 word2", "osent","lead","status","tag","modifer","type")); myDataHolder3.getSentenceHolder().add(new SentenceStructure(1, "src", "word_6 word2", "osent","lead","status","tag","modifer","type")); myDataHolder3.getSentenceHolder().add(new SentenceStructure(2, "src", "word1 word6 word2", "osent","lead","status","tag","modifer","type")); unknownWordBootstrappingModule.unknownWordBootstrappingPostprocessing(myDataHolder3); assertEquals("unknownWordBootstrapping - Postprocessing", "word1 <B>word_6</B> word2", myDataHolder3.getSentence(0).getSentence()); assertEquals("unknownWordBootstrapping - Postprocessing", "<B>word_6</B> word2", myDataHolder3.getSentence(1).getSentence()); assertEquals("unknownWordBootstrapping - Postprocessing", "word1 word6 word2", myDataHolder3.getSentence(2).getSentence()); } @Test public void testIsVerbEnding(){ DataHolder myDataHolder = dataholderFactory(); myDataHolder.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("bearing unknown".split(" "))); myDataHolder.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("doubling unknown".split(" "))); assertEquals("isVerbEnding - case 1 - true", true, unknownWordBootstrappingModule.isVerbEnding(myDataHolder, "doubles")); assertEquals("isVerbEnding - case 1 - false", false, unknownWordBootstrappingModule.isVerbEnding(myDataHolder, "achenes")); assertEquals("isVerbEnding - case 2 - true", true, unknownWordBootstrappingModule.isVerbEnding(myDataHolder, "bears")); assertEquals("isVerbEnding - case 2 - false", false, unknownWordBootstrappingModule.isVerbEnding(myDataHolder, "Armenia")); } private DataHolder dataholderFactory() { DataHolder tester; Configuration myConfiguration = new Configuration(); WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null; try { wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(myConfiguration.getWordNetDictDir(), false); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } WordFormUtility wordFormUtility = new WordFormUtility(wordNetPOSKnowledgeBase); Constant myConstant = new Constant(); tester = new DataHolder(myConfiguration, myConstant, wordFormUtility); return tester; } }