package semanticMarkup.ling.learn.knowledge; import static org.junit.Assert.*; import java.io.IOException; import org.junit.Before; import org.junit.Test; import semanticMarkup.know.lib.WordNetPOSKnowledgeBase; import semanticMarkup.ling.learn.Configuration; import semanticMarkup.ling.learn.Learner; import semanticMarkup.ling.learn.utility.LearnerUtility; import semanticMarkup.ling.transform.ITokenizer; import semanticMarkup.ling.transform.lib.OpenNLPSentencesTokenizer; import semanticMarkup.ling.transform.lib.OpenNLPTokenizer; public class InitializerTest { private Initializer tester; @Before public void initialize() { this.tester = InitiationFactory(); } private Initializer InitiationFactory() { Initializer tester; Configuration myConfiguration = new Configuration(); ITokenizer tokenizer = new OpenNLPTokenizer( myConfiguration.getOpenNLPTokenizerDir()); ITokenizer sentenceDetector = new OpenNLPSentencesTokenizer( myConfiguration.getOpenNLPSentenceDetectorDir()); WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null; try { wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(myConfiguration.getWordNetDictDir(), false); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } LearnerUtility myLearnerUtility = new LearnerUtility(sentenceDetector, tokenizer, wordNetPOSKnowledgeBase); tester = new Initializer(myLearnerUtility, myConfiguration.getNumLeadWords()); return tester; } @Test public void testHandleSentence() { // handleString // null assertEquals("Result", null, tester.handleSentence(null)); // "" assertEquals("Result", "", tester.handleSentence("")); // remove (.a.) assertEquals("Result", "word word word word .", tester.handleSentence("word (.a.) word (a) word ( a ) word.")); // remove [.a.] assertEquals("Result", "word word word word .", tester.handleSentence("word [.a.] word [a] word [ a ] word.")); // remove {.a.} assertEquals("Result", "word word word word .", tester.handleSentence("word {.a.} word {a} word { a } word.")); // to fix basi- and hypobranchial assertEquals( "Result", "word cup_ shaped word cup_ shaped word cup_ shaped word .", tester.handleSentence("word cup --- shaped word cup-shaped word cup --------- shaped word.")); // multiple spaces => 1 space assertEquals("Result", "word word word .", tester.handleSentence("word word word.")); // remove multipe spaces at the beginning assertEquals("Result", "word word .", tester.handleSentence(" word word.")); // remove multipe spaces at the rear assertEquals("Result", "word word .", tester.handleSentence("word word. ")); } @Test public void testHandleText() { // handleTest (Fully finished - Dongye 01/08) // null assertEquals("Result", null, tester.handleText(null)); // "" assertEquals("Result", "", tester.handleText("")); // remove " and ' assertEquals("Result", "words word", tester.handleText("word's wo\"rd")); // plano - to assertEquals("Result", "word to word", tester.handleText("word -to word")); // assertEquals("Result", "word -shaped", tester.handleText("word ______shaped")); // unhide <i> assertEquals("Result", "word <i> word.", tester.handleText("word <i> word.")); // unhide </i> assertEquals("Result", "word </i> word.", tester.handleText("word </i> word.")); // remove 2a. (key marks) assertEquals("Result", "word", tester.handleText("7b. word")); // remove HTML entities assertEquals("Result", "word word", tester.handleText("word & word")); // " & " => " and " assertEquals("Result", "word and word.", tester.handleText("word & word.")); // "_" => "-" assertEquals("Result", "word-word.", tester.handleText("word_word.")); // absent ; => absent; assertEquals("Result", "word; word; word.", tester.handleText("word ;word ;word.")); // absent;blade => absent; blade assertEquals("Result", "word; word; word.", tester.handleText("word;word;word.")); assertEquals("Result", "word: word. word.", tester.handleText("word:word.word.")); // 1 . 5 => 1.5 assertEquals("Result", "word 1.5 word 384739.84 word.", tester.handleText("word 1 . 5 word 384739 . 84 word.")); // #diam . =>diam. assertEquals("Result", "word diam. word diam. word.", tester.handleText("word diam . word diam . word.")); // ca . =>ca. assertEquals("Result", "word ca. word ca. word.", tester.handleText("word ca . word ca . word.")); // cm|mm|dm|m assertEquals("Result", "word 12 cm[DOT] word 376 mm[DOT] word.", tester.handleText("word 12 cm . word 376 mm. word.")); } }