package semanticMarkup.ling.learn; import static org.junit.Assert.*; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.junit.Before; import org.junit.Test; import semanticMarkup.know.lib.WordNetPOSKnowledgeBase; import semanticMarkup.ling.Token; import semanticMarkup.ling.learn.auxiliary.GetNounsAfterPtnReturnValue; import semanticMarkup.ling.learn.auxiliary.StringAndInt; import semanticMarkup.ling.learn.dataholder.DataHolder; import semanticMarkup.ling.learn.dataholder.WordPOSKey; import semanticMarkup.ling.learn.utility.LearnerUtility; import semanticMarkup.ling.transform.ITokenizer; import semanticMarkup.ling.transform.lib.OpenNLPSentencesTokenizer; import semanticMarkup.ling.transform.lib.OpenNLPTokenizer; public class LearnerUtilityTest { private LearnerUtility tester; @Before public void initialize() { Configuration myConfiguration = new Configuration(); ITokenizer sentenceDetector = new OpenNLPSentencesTokenizer( myConfiguration.getOpenNLPSentenceDetectorDir()); ITokenizer tokenizer = new OpenNLPTokenizer(myConfiguration.getOpenNLPTokenizerDir()); WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null; try { wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(myConfiguration.getWordNetDictDir(), false); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } this.tester = new LearnerUtility(sentenceDetector, tokenizer, wordNetPOSKnowledgeBase); } // populate sentence utilities @Test public void testGetType() { assertEquals("PopulateSent Helper - getType: character", 1, tester.getType("Brazeau_2009.xml_states737.txt")); assertEquals("PopulateSent Helper - getType: description", 2, tester.getType("Brazeau_2009.xml_states737_state739.txt")); assertEquals("PopulateSent Helper - getType: otherwise", 0, tester.getType("saf_saiflkds)dsljf_fls.txt")); } @Test public void testHideMarksInBrackets() { assertEquals("Result", null, tester.hideMarksInBrackets(null)); assertEquals("Result", "", tester.hideMarksInBrackets("")); assertEquals("Result", "before (word[DOT] word) after", tester.hideMarksInBrackets("before (word. word) after")); assertEquals("Result", "before (word[QST] word) after", tester.hideMarksInBrackets("before (word? word) after")); assertEquals("Result", "before (word[SQL] word) after", tester.hideMarksInBrackets("before (word; word) after")); assertEquals("Result", "before (word[QLN] word) after", tester.hideMarksInBrackets("before (word: word) after")); assertEquals("Result", "before (word[EXM] word) after", tester.hideMarksInBrackets("before (word! word) after")); } @Test public void testRestoreMarksInBrackets() { assertEquals("Result", null, tester.restoreMarksInBrackets(null)); assertEquals("Result", "", tester.restoreMarksInBrackets("")); assertEquals("Result", "before (word. word) after", tester.restoreMarksInBrackets("before (word[DOT] word) after")); assertEquals("Result", "before (word? word) after", tester.restoreMarksInBrackets("before (word[QST] word) after")); assertEquals("Result", "before (word; word) after", tester.restoreMarksInBrackets("before (word[SQL] word) after")); assertEquals("Result", "before (word: word) after", tester.restoreMarksInBrackets("before (word[QLN] word) after")); assertEquals("Result", "before (word! word) after", tester.restoreMarksInBrackets("before (word[EXM] word) after")); } @Test public void testGetFirstNWords() { List<String> nWords = new ArrayList<String>(); assertEquals("PopulateSent Helper - getFirstNWords: none", nWords, tester.getFirstNWords(null, -1)); assertEquals("PopulateSent Helper - getFirstNWords: none", nWords, tester.getFirstNWords("", -1)); assertEquals("PopulateSent Helper - getFirstNWords: none", nWords, tester.getFirstNWords(null, 1)); assertEquals("PopulateSent Helper - getFirstNWords: none", nWords, tester.getFirstNWords("", 1)); nWords.add("word1"); nWords.add("word2"); assertEquals("PopulateSent Helper - getFirstNWords: none", nWords, tester.getFirstNWords("word1 word2 word3 word4", 2)); assertEquals("PopulateSent Helper - getFirstNWords: none", nWords, tester.getFirstNWords("word1 word2", 3)); } @Test public void testGetAllWords() { Map<String, Integer> wordsBefore = new HashMap<String, Integer>(); wordsBefore.put("word1", 1); wordsBefore.put("word2", 2); Map<String, Integer> wordsAfter = new HashMap<String, Integer>(); wordsAfter.put("word1", 2); wordsAfter.put("word2", 4); wordsAfter.put("word3", 2); wordsAfter.put("word4", 1); wordsAfter.put("word5", 1); assertEquals("PopulateSent Helper - getAllWords", wordsAfter, tester.getAllWords("word1 word2 word3 word2 word3 word4 word5", wordsBefore)); } @Test public void testGetSentencePtns(){ Learner myTester = learnerFactory(); Set<String> token = new HashSet<String>(); token.addAll(Arrays.asList("and or nor".split(" "))); token.add("/"); token.add("and / or"); List<String> words = new ArrayList<String>(); words.addAll(Arrays .asList("distinct crown and <N>base</N> demarcated <B>by</B> <B>a</B> <N>constriction</N> <B>(</B> neck" .split(" "))); String target = "qq&nqbbnbq"; assertEquals("getSentencePtns", target, myTester.getLearnerUtility().getSentencePtn(myTester.getDataHolder(), token, 80, words)); } @Test public void testAddSpace() { // null assertEquals("Result", null, tester.addSpace(null, null)); // "" assertEquals("Result", "", tester.addSpace("", "")); assertEquals("Result", "word , word ; word : word ! word ? word . ", tester.addSpace("word,word;word:word!word?word.", "\\W")); } @Test public void testGetSentenceHead() { assertEquals("getSentenceHead - case 0.1 - null input", null, tester.getSentenceHead(null)); assertEquals("getSentenceHead - case 0.2 - empty string input", "", tester.getSentenceHead("")); assertEquals("getSentenceHead - case 1", "word1 word2", tester.getSentenceHead("word1 word2 , word3")); assertEquals("getSentenceHead - case 1", "word1 word2", tester.getSentenceHead("word1 word2 : word3")); assertEquals("getSentenceHead - case 1", "word1 word2", tester.getSentenceHead("word1 word2 ; word3")); assertEquals("getSentenceHead - case 1", "word1 word2", tester.getSentenceHead("word1 word2 . word3")); assertEquals("getSentenceHead - case 1", "word1 word2", tester.getSentenceHead("word1 word2 [ word3")); assertEquals("getSentenceHead - case 1", "word1 word2", tester.getSentenceHead("word1 word2 ( word3")); assertEquals("getSentenceHead - case 2", "lepidotrichia", tester.getSentenceHead("lepidotrichia , of fin webs")); assertEquals("getSentenceHead - case 2", "bases of", tester.getSentenceHead("bases of tooth whorls")); assertEquals("getSentenceHead - case n", "word1 word2 word3", tester.getSentenceHead("word1 word2 word3")); } @Test public void testHideAbbreviations(){ assertEquals("hideAbbreviations", "Word1 jr[DOT] name word2.", tester.hideAbbreviations("Word1 jr. name word2.")); assertEquals("hideAbbreviations", "Word1 Gen[DOT] name word2.", tester.hideAbbreviations("Word1 Gen. name word2.")); assertEquals("hideAbbreviations", "Word1 uNiV[DOT] name word2.", tester.hideAbbreviations("Word1 uNiV. name word2.")); assertEquals("hideAbbreviations", "Word1 blvd[DOT] name coRp[DOT] word 3 name word2.", tester.hideAbbreviations("Word1 blvd. name coRp. word 3 name word2.")); assertEquals("hideAbbreviations", "Word1 bld[DOT] name coRp[DOT] word 3 name word2.", tester.hideAbbreviations("Word1 bld. name coRp. word 3 name word2.")); assertEquals("hideAbbreviations", "Word1 uNiV[DOT] name coRp[DOT] word 3 name word2.", tester.hideAbbreviations("Word1 uNiV. name coRp. word 3 name word2.")); } @Test public void testRestoreAbbreviations(){ assertEquals("hideAbbreviations", "Word1 jr. name word2.", tester.restoreAbbreviations("Word1 jr[DOT] name word2.")); assertEquals("hideAbbreviations", "Word1 Gen. name word2.", tester.restoreAbbreviations("Word1 Gen[DOT] name word2.")); assertEquals("hideAbbreviations", "Word1 uNiV. name word2.", tester.restoreAbbreviations("Word1 uNiV[DOT] name word2.")); assertEquals("hideAbbreviations", "Word1 uNiV. name coRp. word 3 name word2.", tester.restoreAbbreviations("Word1 uNiV[DOT] name coRp[DOT] word 3 name word2.")); assertEquals("hideAbbreviations", "Word1 uNiV. name pde. word 3 name word2.", tester.restoreAbbreviations("Word1 uNiV[DOT] name pde[DOT] word 3 name word2.")); assertEquals("hideAbbreviations", "Word1 uNiV. name pd. word 3 name word2.", tester.restoreAbbreviations("Word1 uNiV[DOT] name pd[DOT] word 3 name word2.")); } @Test public void testSegmentSentence(){ assertEquals("segmentSentence - handle abbreviations", new Token("This is jr. Gates."), tester.segmentSentence("This is jr. Gates. This is second sentence.").get(0)); assertEquals("segmentSentence - handle abbreviations", new Token("This is second sentence."), tester.segmentSentence("This is jr. Gates. This is second sentence.").get(1)); assertEquals("segmentSentence - handle abbreviations", new Token("The Energy DEPT. is holding a work shop now."), tester.segmentSentence("The Energy DEPT. is holding a work shop now. This is second sentence. ").get(0)); assertEquals("segmentSentence - handle abbreviations", new Token("This is second sentence."), tester.segmentSentence("The Energy DEPT. is holding a work shop now. This is second sentence. ").get(1)); assertEquals("segmentSentence - handle abbreviations", new Token("Mr. Gates from the Energy DEPT. is holding a work shop now."), tester.segmentSentence("Mr. Gates from the Energy DEPT. is holding a work shop now. This is second sentence. ").get(0)); } @Test public void testIterable2Pattern(){ assertEquals("Iterable2Pattern - null", "", tester.Iterable2Pattern(null)); assertEquals("Iterable2Pattern - empty input", "", tester.Iterable2Pattern(new LinkedList<String>())); Set<String> input = new HashSet<String>(); input.add("word1"); input.add("word2"); input.add("word3"); input.add("("); assertEquals("Iterable2Pattern - set input", "word1|word2|word3|\\(", tester.Iterable2Pattern(input)); } @Test public void testPattern2Set(){ assertEquals("pattern2Set - null input", new HashSet<String>(), LearnerUtility.Pattern2Set(null)); assertEquals("pattern2Set - empty input", new HashSet<String>(), LearnerUtility.Pattern2Set("")); Set<String> result = new HashSet<String>(); result.addAll(Arrays.asList("word1|word2|word3".split("|"))); assertEquals("pattern2Set - normal input", result, LearnerUtility.Pattern2Set("word2|word3|word1")); } @Test public void testGetPSWord(){ Learner myTester = learnerFactory(); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"acrodin", "s", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"areas", "p", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"(", "p", "role", "0", "0", null, null})); Set<String> target = new HashSet<String>(); target.add("acrodin"); target.add("areas"); assertEquals("getPSWords", target, myTester.getLearnerUtility().getPSWords(myTester.getDataHolder())); } @Test public void testGetO() { Learner myTester = learnerFactory(); myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status","ignore","m","type"})); myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status",null,"m","type"})); myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status","taga tagb","m","type"})); myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status","taga[tagb]","m","type"})); myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status","tag1","m","type"})); myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status","tag2","m","type"})); Set<String> target = new HashSet<String>(); target.add("tag1"); target.add("tag2"); assertEquals("getOs", target, myTester.getLearnerUtility().getOrgans(myTester.getDataHolder())); } @Test public void testGetModifiers(){ Learner myTester = learnerFactory(); myTester.getDataHolder().add2Holder(DataHolder.MODIFIER, Arrays.asList(new String[] {"basal", "1", "false"})); myTester.getDataHolder().add2Holder(DataHolder.MODIFIER, Arrays.asList(new String[] {"endoskeletal", "1", "false"})); myTester.getDataHolder().add2Holder(DataHolder.MODIFIER, Arrays.asList(new String[] {"\\", "1", "false"})); myTester.getDataHolder().add2Holder(DataHolder.MODIFIER, Arrays.asList(new String[] {null, "1", "false"})); Set<String> target = new HashSet<String>(); target.add("basal"); target.add("endoskeletal"); assertEquals("getModifiers", target, myTester.getLearnerUtility().getModifiers(myTester.getDataHolder())); } @Test public void testGetBoundaries(){ Learner myTester = learnerFactory(); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"\\", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {")", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"[", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"}", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {".", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"|", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"+", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"*", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"?", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {",", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"about", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"along", "b", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"acrodin", "s", "role", "0", "0", null, null})); Set<String> targetWords = new HashSet<String>(); targetWords.addAll(Arrays.asList("about along".split(" "))); Set<String> targetMarks = new HashSet<String>(); targetMarks.addAll(Arrays.asList(") \\ [ } . | * + ?".split(" "))); List<Set<String>> target = new LinkedList<Set<String>>(); target.add(targetWords); target.add(targetMarks); assertEquals("getBoundaries", target, myTester.getLearnerUtility().getBoundaries(myTester.getDataHolder())); } @Test public void testGetProperNouns(){ Learner myTester = learnerFactory(); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"propernoun1", "z", "*", "0", "0", "", null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"acrodin", "s", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"propernoun2", "z", "role", "0", "0", null, null})); myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"(", "z", "role", "0", "0", null, null})); Set<String> target = new HashSet<String>(); target.add("propernoun1"); target.add("propernoun2"); assertEquals("getProperNouns", target, myTester.getLearnerUtility().getProperNouns(myTester.getDataHolder())); } private Learner learnerFactory(){ Learner myTester; Configuration myConfiguration = new Configuration(); ITokenizer tokenizer = new OpenNLPTokenizer( myConfiguration.getOpenNLPTokenizerDir()); ITokenizer sentenceDetector = new OpenNLPSentencesTokenizer( myConfiguration.getOpenNLPSentenceDetectorDir()); WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null; try { wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(myConfiguration.getWordNetDictDir(), false); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } LearnerUtility myLearnerUtility = new LearnerUtility(sentenceDetector, tokenizer, wordNetPOSKnowledgeBase); myTester = new Learner(myConfiguration, tokenizer, myLearnerUtility); return myTester; } @Test public void testTagAllSentence(){ assertEquals("tagAllSentenceHelper", "word1 word2", tester.tagAllSentencesHelper("word1 <tag> word2")); assertEquals("tagAllSentenceHelper", "3_nerved , cup_shaped , 3 - 5 ( - 7 ) _nerved", tester.tagAllSentencesHelper(" 3 - nerved, cup- shaped, 3-5 (-7) -nerved ")); } @Test public void testAnnotateSentence(){ // Test Case 1: See testUnknownWordBootstrapping - Postprocessing // Test Case 2: String input = "stems usually erect , sometimes prostrate to ascending ( underground stems sometimes woody caudices or rhizomes , sometimes fleshy ) ."; String expected1 = "stems usually erect , sometimes prostrate to ascending <B>(</B> underground stems sometimes woody caudices or rhizomes , sometimes fleshy <B>)</B> ."; String expected2 = "stems <B>usually</B> <B>erect</B> , sometimes prostrate to ascending <B>(</B> underground stems sometimes woody caudices or rhizomes , sometimes fleshy <B>)</B> ."; Set<String> boundaryWords = new HashSet<String>(); Set<String> boundaryMarks = new HashSet<String>(); boundaryMarks.addAll(Arrays.asList("( ) [ ] { }".split(" "))); boundaryWords.addAll(Arrays.asList("under up upward usually erect villous was weakly".split(" "))); assertEquals("annotateSentenceHelper1", expected1, tester.annotateSentenceHelper(input, boundaryMarks, "B", false)); assertEquals("annotateSentenceHelper1", expected2, tester.annotateSentenceHelper(expected1, boundaryWords, "B", true)); assertEquals("annotateSentenceHelper2", " word ", tester.annotateSentenceHelper2("<B> </B> word <N> </N>")); assertEquals("annotateSentenceHelper2", "<B> </C> word ", tester.annotateSentenceHelper2("<B> </C> word <B> </B>")); assertEquals("annotateSentenceHelper2", "and", tester.annotateSentenceHelper2("<B>and</B>")); assertEquals("annotateSentenceHelper2", "and</B>", tester.annotateSentenceHelper2("and</B>")); } // @Test // public void testDoItCaseHandle(){ // // case x: boundary case // Learner myTesterBoundary = learnerFactory(); // assertEquals("CaseHandle - boundary case", null, myTesterBoundary.doItCaseHandle(null, null)); // assertEquals("CaseHandle - boundary case", new StringAndInt("",0), myTesterBoundary.doItCaseHandle("", "")); // // // case 1 // Learner myTester1 = learnerFactory(); // myTester1.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"submandibular", "s", "", "0", "0", null, null})); // myTester1.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"submandibulars", "p", "", "0", "0", null, null})); // // assertEquals("CaseHandle - case 1", new StringAndInt("submandibulars",0), myTester1.doItCaseHandle("submandibulars", "submandibulars")); // // // case 2 // Learner myTester2 = learnerFactory(); // myTester2.getDataHolder().add2Holder(DataHolder.SENTENCE, // Arrays.asList(new String[] {"src", // "<N>stems</N> <B>usually</B> erect , sometimes prostrate to ascending <B>(</B> underground <N>stems</N> sometimes woody <O>caudices</O> or rhizomes , sometimes fleshy <B>)</B> . ", // "Stems usually erect, sometimes prostrate to ascending (underground stems sometimes woody caudices or rhizomes, sometimes fleshy ).", // "lead","status",null,"m","type"})); // myTester2.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"stems", "p", "", "0", "0", null, null})); // myTester2.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"stem", "s", "", "0", "0", null, null})); // myTester2.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"usually", "s", "", "0", "0", null, null})); // String sentence = // "stems usually erect , sometimes prostrate to ascending ( underground stems sometimes woody caudices or rhizomes , sometimes fleshy ) ."; // String lead = "stems usually erect"; // assertEquals("CaseHandle - case 2", new StringAndInt("stems",1), myTester2.doItCaseHandle(sentence, lead)); // assertEquals("CaseHandle - case 2, updatePOS - case 2.1, resolveConfict, changePOS - case 2", true, myTester2.getDataHolder().getWordPOSHolder().containsKey(new WordPOSKey("usually", "b"))); // assertEquals("CaseHandle - case 2, discountPOS - all", false, myTester2.getDataHolder().getWordPOSHolder().containsKey(new WordPOSKey("usually", "s"))); // // // case 3.2 // // This also tests method markKnown() - case 1.1 // Learner myTester32 = learnerFactory(); // myTester32.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"teeth", "p", "role", "1", "1", "", ""})); // myTester32.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"with", "b", "role", "1", "1", "", ""})); // // myTester32.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList(new String[] {"bicuspid", "unknown"})); // myTester32.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList(new String[] {"multicuspid", "unknown"})); // myTester32.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList(new String[] {"tricuspid", "unknown"})); // // myTester32.getDataHolder().add2Holder(DataHolder.SINGULAR_PLURAL, Arrays.asList(new String[] {"tooth", "teeth"})); // // assertEquals("CaseHandle - case 3.2", new StringAndInt("teeth",4), // myTester32.doItCaseHandle("teeth unicuspid with crowns posteriorly curved along the main axis of the mandible , organized into a long series of equally_ sized teeth", // "teeth unicuspid with")); // // // case 4 // // case 4.2 // Learner myTester42 = learnerFactory(); // // test case 1 // myTester42.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"teeth", "p", "role", "1", "1", "", ""})); // myTester42.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"variously", "b", "role", "0", "0", "", ""})); // // myTester42.getDataHolder().add2Holder(DataHolder.SINGULAR_PLURAL, Arrays.asList(new String[] {"tooth", "teeth"})); // myTester42.getDataHolder().add2Holder(DataHolder.SINGULAR_PLURAL, Arrays.asList(new String[] {"base", "bases"})); // // assertEquals("CaseHandle - case 4.2", new StringAndInt("teeth",0), // myTester42.doItCaseHandle("teeth variously arranged , but never very numerous , equally_ sized and regularly curved posteriorly along main axis of mandible", // "teeth variously arranged")); // // //case 4.2 - test case 2 // myTester42.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"muscle", "s", "role", "0", "0", "", ""})); // myTester42.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"with", "b", "role", "0", "0", "", ""})); // assertEquals("CaseHandle - case 4.2", new StringAndInt("hyohyoidei muscle",1), // myTester42.doItCaseHandle("hyohyoidei muscle with a broad origin across the entire ventral surface and lateral margins of the ventrolateral wings of the urohyal", // "hyohyoidei muscle with")); // // //case 4.2 - test case 2 // myTester42.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"bases", "p", "role", "0", "0", "", ""})); // myTester42.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"of", "b", "role", "2", "2", "", ""})); // assertEquals("CaseHandle - case 4.2", new StringAndInt("bases",0), // myTester42.doItCaseHandle("bases of tooth whorls", "bases of")); // // // // case 5.1.3 and case x // Learner myTester513x = learnerFactory(); // myTester513x.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"styles", "p", "role", "1", "1", "", ""})); // myTester513x.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"style", "s", "role", "1", "1", "", ""})); // myTester513x.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"branches", "p", "role", "23", "23", "", ""})); // myTester513x.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"branch", "s", "role", "23", "23", "", ""})); // StringAndInt result513x = myTester513x.doItCaseHandle("styles branches :", "styles branches"); // StringAndInt target513x = new StringAndInt("branches",1); // assertEquals("CaseHandle - case 5.1.3 and case x", result513x, target513x); // // Learner myTester52 = learnerFactory(); // myTester52.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"basal", "b", "role", "30", "30", "", ""})); // myTester52.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"leaf", "s", "role", "0", "0", "", ""})); // myTester52.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"blades", "p", "role", "63", "63", "", ""})); // myTester52.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"linear_lanceolate", "b", "role", "2", "2", "", ""})); // myTester52.getDataHolder().add2Holder(DataHolder.MODIFIER, // Arrays.asList(new String[] {"basal", "1", "false"})); // StringAndInt result52 = myTester52.doItCaseHandle( // "basal leaf blades linear_lanceolate , 3 ?10 cm , margins entire or with remote linear lobes , apices acute ;", // "basal leaf blades"); // StringAndInt target52 = new StringAndInt("basal leaf blades", 0); // assertEquals("CaseHandle - case 5.2", result52, target52); // // // // case 6.2 // Learner myTester62 = learnerFactory(); // // myTester62.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"cauline", "b", "role", "1", "1", "", ""})); // myTester62.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"much", "s", "role", "1", "1", "", ""})); // // myTester62.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"reduced", "b", "role", "11", "11", "", ""})); // myTester62.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"distally", "b", "role", "2", "2", "", ""})); // // StringAndInt returnedValue62 = myTester62.doItCaseHandle( // "principal cauline much reduced distally , sessile , bases decurrent or not , as spiny wings ;", // "principal cauline much"); // assertEquals("CaseHandle - case 6.2", "principal cauline much", returnedValue62.getString()); // // //// assertEquals(myTester7.doItCase7Helper("^s(\\?)$", "s?"); // //// // case 7 //// Learner myTester7 = new Learner(myConfiguration, myUtility); //// assertEquals(myTester7.doItCase7Helper("^s(\\?)$", "s?"); // // // case 9 // Learner myTester9 = learnerFactory(); // myTester9.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"basal", "b", "role", "24", "24", "", ""})); // myTester9.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"leaves", "p", "role", "112", "112", "", ""})); // myTester9.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"leaf", "s", "role", "112", "112", "", ""})); // assertEquals("CaseHandle - case 9", new StringAndInt("basal leaves",0), // myTester9.doItCaseHandle("basal leaves :", "basal leaves")); // // // case 10 // // case 10.1.1 // Learner myTester10_1_1 = learnerFactory(); // myTester10_1_1.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"of", "b", "role", "0", "0", "", ""})); // assertEquals("CaseHandle - case 10.1.1", new StringAndInt("teeth",2), // myTester10_1_1.doItCaseHandle("teeth of dentary", // "teeth of")); // // myTester10_1_1.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"on", "b", "role", "4", "4", "", ""})); // assertEquals("CaseHandle - case 10.1.1", new StringAndInt("foramina",2), // myTester10_1_1.doItCaseHandle("foramina on external surface of lower jaw", // "foramina on")); // // case 10.1.2 // Learner myTester10_1_2 = learnerFactory(); // myTester10_1_2.finiteSetsLoader.run(myTester10_1_2.getDataHolder()); // // assertEquals("CaseHandle - case 10.1.1", new StringAndInt("stems",2), // myTester10_1_2.doItCaseHandle("stems 1 ?several , erect or ascending , densely gray_tomentose ", // "stems NUM several")); // // // // case 10.2 // Learner myTester10_2 = learnerFactory(); // myTester10_2.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"between", "b", "role", "0", "0", "", ""})); // myTester10_2.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"scales", "p", "role", "0", "0", "", ""})); // myTester10_2.getDataHolder().add2Holder(DataHolder.SINGULAR_PLURAL, Arrays.asList(new String[] {"scale", "scales"})); // // assertEquals("CaseHandle - case 10.2", new StringAndInt("",0), // myTester10_2.doItCaseHandle("passes between scales", // "passes between")); // // // case 0 // Learner myTester0 = learnerFactory(); // // myTester0.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"does", "b", "role", "0", "0", "", ""})); // myTester0.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"not", "b", "role", "0", "0", "", ""})); // // assertEquals("CaseHandle - case 0", new StringAndInt("",0), // myTester0.doItCaseHandle("does not cross over the anterodorsal corner of opercular bone", // "does not cross")); // // } // @Test // public void testDoItMarkup() { // Learner myTester = learnerFactory(); // // myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] { // "src", "sent nor", "osent","lead","status",null,"m","type"})); // myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] { // "src", "sent and", "osent","lead","status","","m","type"})); // myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] { // "src", "sent", "osent","lead","status","unknown","m","type"})); //// assertEquals("doItMarkup - case 1", 0, myTester.doItMarkup()); // // assertEquals("doItMarkup - Helper - true", true, myTester.doItMarkupHelper(null)); // assertEquals("doItMarkup - Helper - true", true, myTester.doItMarkupHelper("")); // assertEquals("doItMarkup - Helper - true", true, myTester.doItMarkupHelper("unknown")); // assertEquals("doItMarkup - Helper - false", false, myTester.doItMarkupHelper("abc")); // // assertEquals("doItMarkup - case 1 - true", true, myTester.doItMarkupCase1Helper("postcleithra 2 and 3 fused into a single ossification")); // assertEquals("doItMarkup - case 1 - false", false, myTester.doItMarkupCase1Helper("postcleithra 2 3 fused into a single ossification")); // // assertEquals("doItMarkup - case 2 - true", true, myTester.doItMarkupCase2Helper("ossified as autogenous units")); // assertEquals("doItMarkup - case 2 - false", false, myTester.doItMarkupCase2Helper("ossified autogenous units")); // // } // @Test // public void testGetNounsAfterPtn() { // Learner myTester = learnerFactory(); // // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"margins", "p", "role", "0", "0", null, null})); // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"often", "b", "role", "0", "0", null, null})); // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"??", "b", "role", "0", "0", null, null})); // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"deeply", "b", "role", "0", "0", null, null})); // // List<String> nouns = new ArrayList<String>(); // nouns.add("margins"); // List<String> nounPtn = new ArrayList<String>(); // nounPtn.add("p"); // String bWord = "often"; // GetNounsAfterPtnReturnValue target = new GetNounsAfterPtnReturnValue(nouns, nounPtn, bWord); // // assertEquals("getNounsAfterPtn", target, myTester.getNounsAfterPtn("proximal blade margins often ?? deeply lobed , ( spiny in c . benedicta ) , distal ?smaller , often entire , faces glabrous or ?tomentose , sometimes also villous , strigose , or puberulent , often glandular_punctate .", 2)); // // } // @Test // public void testGetPOSptn(){ // Learner myTester = learnerFactory(); // // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"teeth", "p", "role", "1", "1", "", ""})); // // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"unicuspid", "p", "role", "1", "3", "", ""})); // // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, // Arrays.asList(new String[] {"with", "b", "role", "1", "1", "", ""})); // // assertEquals("getPOSptn", "p?b", myTester.getPOSptn(Arrays.asList("teeth unicuspid with".split(" ")))); // } // @Test // public void testIsFollowedByNoun() { // Learner myTester = learnerFactory(); // // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"rhombic", "b", "role", "0", "0", null, null})); // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"bones", "p", "role", "0", "0", null, null})); // myTester.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"radial", "s", "role", "0", "0", null, null})); // // assertEquals("isFollowedByNoun - null case", false, myTester.isFollowedByNoun(null, null)); // assertEquals("isFollowedByNoun - empty case", false, myTester.isFollowedByNoun("", "")); // assertEquals("isFollowedByNoun", true, myTester.isFollowedByNoun("foramina on dermal cheek bones", "foramina on")); // assertEquals("isFollowedByNoun", true, myTester.isFollowedByNoun("foramina on bones", "foramina on")); // assertEquals("isFollowedByNoun", false, myTester.isFollowedByNoun("teeth of dentary", "teeth of")); // } // @Test // public void testTagSentence() { // Learner myTester = learnerFactory(); // myTester.getConfiguration().setMaxTagLength(10); // // myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status","tag","m","type"})); // // assertEquals("tagIt - case 1", false, myTester.tagSentence(0, "")); // assertEquals("tagIt - case 2", false, myTester.tagSentence(0, "page")); // assertEquals("tagIt - case 3", true, myTester.tagSentence(0, "teeth")); // assertEquals("tagIt - max tag length", "teeth", myTester.getDataHolder().getSentenceHolder().get(0).getTag()); // // assertEquals("tagIt - case 3", true, myTester.tagSentence(0, "abcdefghijkl")); // //myTester.tagSentence(0, "abcdefghijkl"); // assertEquals("tagIt - max tag length", "abcdefghij", myTester.getDataHolder().getSentenceHolder().get(0).getTag()); // } public LearnerUtility learnerUtilityFactory() { Configuration myConfiguration = new Configuration(); ITokenizer tokenizer = new OpenNLPTokenizer( myConfiguration.getOpenNLPTokenizerDir()); ITokenizer sentenceDetector = new OpenNLPSentencesTokenizer( myConfiguration.getOpenNLPSentenceDetectorDir()); WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null; try { wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(myConfiguration.getWordNetDictDir(), false); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } LearnerUtility myLearnerUtility = new LearnerUtility(sentenceDetector, tokenizer, wordNetPOSKnowledgeBase); return myLearnerUtility; } }