package semanticMarkup.ling.learn;
import static org.junit.Assert.*;
import java.io.IOException;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.junit.Before;
import org.junit.Test;
import semanticMarkup.know.lib.WordNetPOSKnowledgeBase;
import semanticMarkup.ling.learn.auxiliary.GetNounsAfterPtnReturnValue;
import semanticMarkup.ling.learn.auxiliary.StringAndInt;
import semanticMarkup.ling.learn.dataholder.DataHolder;
import semanticMarkup.ling.learn.dataholder.SentenceStructure;
import semanticMarkup.ling.learn.dataholder.WordPOSKey;
import semanticMarkup.ling.learn.knowledge.Constant;
import semanticMarkup.ling.learn.utility.LearnerUtility;
import semanticMarkup.ling.transform.ITokenizer;
import semanticMarkup.ling.transform.lib.OpenNLPSentencesTokenizer;
import semanticMarkup.ling.transform.lib.OpenNLPTokenizer;
public class LearnerTest {
private Learner tester;
@Before
public void initialize() {
this.tester = learnerFactory();
}
// @Test
// public void testLearn() {
// Configuration myConfiguration = new Configuration();
// Utility myUtility = new Utility(myConfiguration);
// DataHolder results = new DataHolder(myConfiguration, myUtility);
//
// Map<String, String> myHeuristicNounTable = results
// .getHeuristicNounTable();
// myHeuristicNounTable.put("word1", "type1");
//
// List<Sentence> mySentenceTable = results.getSentenceHolder();
// mySentenceTable.add(new Sentence(0, "source1", "sentence1",
// "originalSentence", "lead1", "status1", "tag1", "modifier1",
// "type1"));
//
// // Learner tester = new Learner("plain","res/WordNet/WordNet-3.0/dict");
//
// // assertEquals ("learner", results, tester.Learn(tms));
//
// // results = tester.Learn(tms);
//
// // assertEquals ("learner", results, tester.Learn(tms));
// }
// @Test
// public void testPopulateUnknownWordsTable() {
// fail("Not yet implemented");
// }
@Test
public void testDiscountPOS() {
// case "all"
// see doItCaseHandle case 2
}
@Test
public void testResolveConfict() {
// see doItCaseHandle case 2
}
@Test
public void testChangePOS(){
// see doItCaseHandle case 2
}
@Test
public void testUpdatePOS(){
// see doItCaseHandle case 2
}
// @Test
// public void testGetParentSentenceTag() {
// fail("Not yet implemented");
// }
// @Test
// public void testTagSentWithMT() {
// fail("Not yet implemented");
// }
// @Test
// public void testProcessNewWord() {
// fail("Not yet implemented");
// }
// @Test
// public void testSingularPluralVariations() {
// fail("Not yet implemented");
// }
// @Test
// public void testUpdateUnknownWords() {
// fail("Not yet implemented");
// }
// @Test
// public void testAddHeuristicsNouns() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testAddDescriptors() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testAddNouns() {
// fail("Not yet implemented");
// }
// @Test
// public void testGetHeuristicsNouns() {
// fail("Not yet implemented");
// }
@Test
public void testGetHeuristicsNounsHelper() {
HashSet<String> words = new HashSet<String>();
words.add("septa");
words.add("word1");
words.add("septum");
assertEquals("getHeuristicsNouns - handleSpecialCase 1", "septa[p]",
tester.getHeuristicsNounsHelper("septa[s]", words));
}
@Test
public void testGetPresentAbsentNouns() {
// Method getPresentAbsentNouns
assertEquals("getPresentAbsentNouns - no present/absent", "",
tester.getPresentAbsentNouns("only one pair of abcly presen"));
assertEquals("getPresentAbsentNouns - and|or|to", "",
tester.getPresentAbsentNouns("only one pair of and present"));
assertEquals("getPresentAbsentNouns - STOP words", "",
tester.getPresentAbsentNouns("only one pair of without absent"));
assertEquals(
"getPresentAbsentNoun - always|often|seldom|sometimes|[a-z]+lys",
"",
tester.getPresentAbsentNouns("only one pair of abcly present"));
assertEquals("getPresentAbsentNouns - PENDINGS", "circuli[p]",
tester.getPresentAbsentNouns("only one pair of circuli absent"));
assertEquals("getPresentAbsentNouns - end with ss", "glass[s]",
tester.getPresentAbsentNouns("only one pair of glass absent"));
assertEquals(
"getPresentAbsentNouns - end with none ss",
"computers[p]",
tester.getPresentAbsentNouns("only one pair of computers absent"));
assertEquals("getPresentAbsentNouns - teeth", "teeth[p]",
tester.getPresentAbsentNouns("only one pair of teeth present"));
assertEquals("getPresentAbsentNouns - not SENDINGS", "serum[s]",
tester.getPresentAbsentNouns("only one pair of serum absent"));
assertEquals(
"getPresentAbsentNouns - SENDINGS",
"computer[s]",
tester.getPresentAbsentNouns("only one pair of computer absent"));
}
// @Test
// public void testCharacterHeuristics() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testAdd2HeuristicNounTable() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testFilterOutDescriptors() {
// fail("Not yet implemented");
// }
@Test
public void testGetTaxonNameNouns() {
// Nouns rule 0: Taxon name nouns
Set<String> taxonNames = new HashSet<String>();
// Method getTaxonNameNouns
assertEquals("getTaxonNameNouns - not match", taxonNames,
tester.getTaxonNameNouns("word word word"));
assertEquals("getTaxonNameNouns - empty taxon name", taxonNames,
tester.getTaxonNameNouns("< i >< / i >"));
taxonNames.add("word1 word2 word3");
taxonNames.add("word1");
taxonNames.add("word2");
taxonNames.add("word3");
taxonNames.add("word4 word5");
taxonNames.add("word4");
taxonNames.add("word5");
assertEquals(
"getTaxonNameNouns - match",
taxonNames,
tester.getTaxonNameNouns("< i >word1 word2 word3< / i>, < i >word4 word5< /i>"));
}
@Test
public void testGetNounsMecklesCartilage() {
// Nouns rule 0.5: Method getNounsMecklesCartilage
Set<String> nouns = new HashSet<String>();
assertEquals("getTaxonNameNouns - not match", nouns,
tester.getNounsMecklesCartilage("word word word"));
nouns.add("meckel#s");
nouns.add("meckels");
nouns.add("meckel");
assertEquals("getTaxonNameNouns - match", nouns,
tester.getNounsMecklesCartilage("word Meckel#s word"));
}
@Test
public void testGetNounsRule1() {
// Method getNounsRule1
// Set<String> descriptorMap = new HashSet<String>();
Set<String> nouns1 = new HashSet<String>();
nouns1.add("term1");
assertEquals(
"getNounsRule1",
nouns1,
tester.getNounsRule1(
"Chang_2004.xml_ ffa60eb1-4320-4e69-b151-75a2615dca4b_29482156-8083-430c-91f4-e80209b50138.txt-0",
"term1", new HashMap<String, Boolean>()));
}
@Test
public void testGetNounsRule2() {
// Method getNounsRule2
Set<String> nouns2 = new HashSet<String>();
assertEquals("getNounsRule2 - not match", nouns2,
tester.getNounsRule2("word word word soe width nea"));
nouns2.add("nouna");
assertEquals("getNounsRule2 - match 1", nouns2,
tester.getNounsRule2("word word word some nouna"));
nouns2.add("nounb");
assertEquals(
"getNounsRule2 - match 2",
nouns2,
tester.getNounsRule2("word some nouna near word some width near word third nounb near end"));
assertEquals(
"getNounsRule2 - match 2",
nouns2,
tester.getNounsRule2("word some nouna near word some width near word third nounb near end nounc abction of end"));
}
@Test
public void testGetNounsRule3Helper() {
// Method getNounsRule3
Set<String> nouns3 = new HashSet<String>();
nouns3.add("II");
nouns3.add("IX");
assertEquals(
"getNounsRule3",
nouns3,
tester.getNounsRule3Helper("posterior and dorsal to foramen for nerve II (i.e. a posterior oblique myodome IX)"));
nouns3.remove("II");
nouns3.remove("IX");
nouns3.add("Meckelian");
assertEquals(
"getNounsRule3",
nouns3,
tester.getNounsRule3Helper("Pronounced dorsal process on Meckelian element"));
}
@Test
public void testGetNounsRule4() {
// Method getNounsRule4
Set<String> nouns4 = new HashSet<String>();
assertEquals("getNounsRule4 - not match", nouns4,
tester.getNounsRule4("word word word noun one"));
nouns4.add("nouna");
assertEquals("getNounsRule4 - not match", nouns4,
tester.getNounsRule4("word word word nouna 1"));
nouns4.remove("nouna");
nouns4.add("nounb");
assertEquals(
"getNounsRule4 - not match",
nouns4,
tester.getNounsRule4("word word word page 1 word above 2 word NoUnb 2 end"));
}
@Test
public void testGetDescriptorsRule1() {
// Method getDescriptorsRule1
Set<String> descriptors1 = new HashSet<String>();
descriptors1.add("absent");
assertEquals("getDescriptorsRule1", descriptors1,
tester.getDescriptorsRule1(
"Brazeau_2009.xml_states200_state202.txt-0", "absent",
new HashSet<String>()));
descriptors1.remove("absent");
descriptors1.add("present");
Set<String> nouns = new HashSet<String>();
nouns.add("present");
assertEquals("getDescriptorsRule1", new HashSet<String>(),
tester.getDescriptorsRule1(
"Brazeau_2009.xml_states200_state203.txt-0", "present",
nouns));
assertEquals("getDescriptorsRule1", descriptors1,
tester.getDescriptorsRule1(
"Brazeau_2009.xml_states200_state203.txt-0", "present",
new HashSet<String>()));
}
// @Test
// public void testGetDescriptorsRule2() {
// fail("Not yet implemented");
// }
@Test
public void testIsDescriptor() {
// Method filterOutDescriptors
Set<String> rNouns = new HashSet<String>();
Set<String> rDescriptors = new HashSet<String>();
Set<String> results = new HashSet<String>();
rNouns.add("noun1");
rNouns.add("descriptor2");
rNouns.add("noun2");
rDescriptors.add("descriptor1");
rDescriptors.add("descriptor2");
rDescriptors.add("descriptor3");
results.add("noun1");
results.add("noun2");
assertEquals("filterOutDescriptors", results,
tester.filterOutDescriptors(rNouns, rDescriptors));
}
@Test
public void testIsMatched() {
// Method isMatched
Map<String, Boolean> descriptorMap = new HashMap<String, Boolean>();
descriptorMap.put("term1", false);
assertEquals("isMatched", false, descriptorMap.get("term1"));
assertEquals("isMatched", true, tester.isMatched(
"begin word word was term1 word word end", "term1",
descriptorMap));
assertEquals("isMatched", true, descriptorMap.get("term1"));
}
// @Test
// public void testAddStopWords() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testAddCharacters() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testAddNumbers() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testAddClusterstrings() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testAddProperNouns() {
// fail("Not yet implemented");
// }
//
@Test
public void testPosBySuffix() {
// Pattern 1: ^[a-z_]+(er|est|fid|form|ish|less|like|ly|merous|most|shaped)$
// Pattern 2: ^[._.][a-z]+
tester.posBySuffix();
}
@Test
public void testPosBySuffixCase1Helper(){
assertEquals("posBySuffix Case1 - match", true, tester.posBySuffixCase1Helper("approximately"));
assertEquals("posBySuffix Case1 - not match", false, tester.posBySuffixCase1Helper("bigger"));
assertEquals("posBySuffix Case1 - match", true, tester.posBySuffixCase1Helper("bifid"));
assertEquals("posBySuffix Case1 - not match", false, tester.posBySuffixCase1Helper("per"));
}
@Test
public void testPosBySuffixCase2Helper(){
assertEquals("posBySuffix Case2 - match", true, tester.posBySuffixCase2Helper("_nerved"));
assertEquals("posBySuffix Case2 - not match", false, tester.posBySuffixCase2Helper("nerved"));
}
@Test
public void testContainSuffix() {
// test method containSuffix
assertEquals("containSuffix less", true,
tester.containSuffix("less", "", "less"));
assertEquals("containSuffix ly", true,
tester.containSuffix("slightly", "slight", "ly"));
assertEquals("containSuffix er", false,
tester.containSuffix("fewer", "few", "er"));
assertEquals("containSuffix est", true,
tester.containSuffix("fastest", "fast", "est"));
assertEquals("containSuffix base is in WN", true,
tester.containSuffix("platform", "plat", "form"));
assertEquals("containSuffix sole adj", true,
tester.containSuffix("scalelike", "scale", "like"));
// case 3.1.2 and case 3.3.3 not tested
assertEquals("containSuffix 111", false,
tester.containSuffix("anterolaterally", "anterolateral", "ly")); // 111
assertEquals("containSuffix 121", false,
tester.containSuffix("mesially", "mesial", "ly")); // 121
assertEquals("containSuffix 122", false,
tester.containSuffix("per", "p", "er")); // 122
assertEquals("containSuffix 212", false,
tester.containSuffix("border", "bord", "er")); // 212
assertEquals("containSuffix 212", false,
tester.containSuffix("bigger", "bigg", "er")); // 212
assertEquals("containSuffix 221", true,
tester.containSuffix("anteriorly", "anterior", "ly")); // 221
assertEquals("containSuffix 222", false,
tester.containSuffix("corner", "corn", "er")); // 222
assertEquals("containSuffix 222", true,
tester.containSuffix("lower", "low", "er")); // 222
assertEquals("containSuffix 223", true,
tester.containSuffix("bifid", "bi", "fid")); // 223
}
@Test
public void testMarkupByPattern() {
Learner myTester = learnerFactory();
myTester.getDataHolder().add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"source1", "sentence1", "x=word word word", "lead1", "status1", "tag1", "modifier1", "type1"}));
myTester.markupByPattern();
List<SentenceStructure> targetSentenceHolder = new LinkedList<SentenceStructure>();
targetSentenceHolder.add(new SentenceStructure(0, "source1", "sentence1", "x=word word word", "lead1", "status1", "chromosome", "", "type1"));
assertEquals("markupByPattern", targetSentenceHolder, myTester.getDataHolder().getSentenceHolder());
}
@Test
public void testMarkupByPatternHelper(){
// case 1
SentenceStructure mySentence1 = new SentenceStructure(0, "source1", "sentence1", "x=word word word", "lead1", "status1", "tag1", "modifier1", "type1");
SentenceStructure target1 = new SentenceStructure(0, "source1", "sentence1", "x=word word word", "lead1", "status1", "chromosome", "", "type1");
tester.markupByPatternHelper(mySentence1);
assertEquals("markupByPatternHelper - case 1", target1,mySentence1);
// case 2
SentenceStructure mySentence2 = new SentenceStructure(1, "source2", "sentence2", "2n=abc...", "lead2", "status2", "tag2", "modifier2", null);
SentenceStructure target2 = new SentenceStructure(1, "source2", "sentence2", "2n=abc...", "lead2", "status2", "chromosome", "", null);
tester.markupByPatternHelper(mySentence2);
assertEquals("markupByPatternHelper - case 2", target2,mySentence2);
// case 3
SentenceStructure mySentence3 = new SentenceStructure(2, "source", "sentence", "x word word", "lead", "status", "tag", "modifier", null);
SentenceStructure target3 = new SentenceStructure(2, "source", "sentence", "x word word", "lead", "status", "chromosome", "", null);
tester.markupByPatternHelper(mySentence3);
assertEquals("markupByPatternHelper - case 3", target3, mySentence3);
// case 4
SentenceStructure mySentence4 = new SentenceStructure(3, "source", "sentence", "2n word word", "lead",null, "tag", "modifier", null);
SentenceStructure target4 = new SentenceStructure(3, "source", "sentence", "2n word word", "lead", null, "chromosome", "", null);
tester.markupByPatternHelper(mySentence4);
assertEquals("markupByPatternHelper - case 4", target4, mySentence4);
// case 5
SentenceStructure mySentence5 = new SentenceStructure(4, "source", "sentence", "2 nword word", "lead", "status", "tag", "modifier", "");
SentenceStructure target5 = new SentenceStructure(4, "source", "sentence", "2 nword word", "lead", "status", "chromosome", "", "");
tester.markupByPatternHelper(mySentence5);
assertEquals("markupByPatternHelper - case 5", target5, mySentence5);
// case 6
SentenceStructure mySentence6 = new SentenceStructure(5, "source", "sentence", "fl. word word", "lead", "status", null, null, "");
SentenceStructure target6 = new SentenceStructure(5, "source", "sentence", "fl. word word", "lead", "status", "flowerTime", "", "");
tester.markupByPatternHelper(mySentence6);
assertEquals("markupByPatternHelper - case 6", target6, mySentence6);
// case 7
SentenceStructure mySentence7 = new SentenceStructure(6, "source", "sentence", "fr.word word", "lead", "status", null, "", "");
SentenceStructure target7 = new SentenceStructure(6, "source", "sentence", "fr.word word", "lead", "status", "fruitTime", "", "");
tester.markupByPatternHelper(mySentence7);
assertEquals("markupByPatternHelper - case 7", target7, mySentence7);
}
@Test
public void testMarkupIgnore() {
Learner myTester = learnerFactory();
myTester.getDataHolder().add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"source1", "sentence1", "IGNOREPTN", "lead1", "status1", "tag1", "modifier1", "type1"}));
myTester.markupIgnore();
List<SentenceStructure> targetSentenceHolder = new LinkedList<SentenceStructure>();
targetSentenceHolder.add(new SentenceStructure(0, "source1", "sentence1", "IGNOREPTN", "lead1", "status1", "ignore", "", "type1"));
assertEquals("markupIgnore", targetSentenceHolder, myTester.getDataHolder().getSentenceHolder());
}
@Test
public void testMarkupIgnoreHelper() {
SentenceStructure mySentence1 = new SentenceStructure(0, "source", "sentence", "IGNOREPTN", "lead", "status", null, "", "");
SentenceStructure target1 = new SentenceStructure(0, "source", "sentence", "IGNOREPTN", "lead", "status", "ignore", "", "");
tester.markupIgnoreHelper(mySentence1);
assertEquals("markupIgnoreHelper", target1, mySentence1);
SentenceStructure mySentence2 = new SentenceStructure(1, "source", "sentence", " IGNOREPTN", "lead", "status", null, "", "");
SentenceStructure target2 = new SentenceStructure(1, "source", "sentence", " IGNOREPTN", "lead", "status", "ignore", "", "");
tester.markupIgnoreHelper(mySentence2);
assertEquals("markupIgnoreHelper", target2, mySentence2);
}
// @Test
// public void testDiscover() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testRuleBasedLearn() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testDoIt() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testGetPOSptn() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testCheckPOSInfo() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testTagIt() {
// fail("Not yet implemented");
// }
//
// @Test
// public void testMatchPattern() {
// fail("Not yet implemented");
// }
@Test
public void testHasHead(){
assertEquals("hasHead - null", false,
tester.hasHead( null,
Arrays.asList("passing through most".split(" "))));
assertEquals("hasHead - not has", false,
tester.hasHead( Arrays.asList("passing through".split(" ")),
Arrays.asList("passing throug most".split(" "))));
assertEquals("hasHead - empty head", true,
tester.hasHead( new ArrayList<String>(),
Arrays.asList("passing through most".split(" "))));
assertEquals("hasHead - has", true,
tester.hasHead( Arrays.asList("passing through".split(" ")),
Arrays.asList("passing through most".split(" "))));
assertEquals("hasHead - head same as list", true,
tester.hasHead( Arrays.asList("passing through most".split(" ")),
Arrays.asList("passing through most".split(" "))));
}
@Test
public void testWrapupMarkup() {
// // case 1
// Learner myTester1 = learnerFactory();
//
// myTester1.getDataHolder().getSentenceHolder().add(new SentenceStructure(7, "src", "sent", "osent","sensory line not null","status","notnull","modifer","type"));
// myTester1.getDataHolder().getSentenceHolder().add(new SentenceStructure(192, "src", "sent", "osent","sensory line ignore","status","ignore","modifer","type"));
// myTester1.getDataHolder().getSentenceHolder().add(new SentenceStructure(193, "src", "sent", "osent","sensory line canal","status",null,"modifer","type"));
// myTester1.getDataHolder().getSentenceHolder().add(new SentenceStructure(267, "src", "sent", "osent","sensory line canals","status",null,"modifer","type"));
// myTester1.getDataHolder().getSentenceHolder().add(new SentenceStructure(269, "src", "sent", "osent","opening via tubular","status",null,"modifer","type"));
//
// myTester1.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"line", "s", "*", "1", "1", "", null}));
// myTester1.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"canals", "p", "*", "1", "1", "", null}));
//
// myTester1.wrapupMarkup();
//
// assertEquals("wrapupmarkup - case 1 - tag sentence", "sensory line canal", myTester1.getDataHolder().getSentence(193).getTag());
// assertEquals("wrapupmarkup - case 1 - tag sentence", "sensory line", myTester1.getDataHolder().getSentence(267).getTag());
//
// // case 2
// Learner myTester2 = learnerFactory();
//
// myTester2.getDataHolder().getSentenceHolder().add(new SentenceStructure(115, "src", "sent", "osent","midsagittal fontanel absent","status",null,"modifer","type"));
// myTester2.getDataHolder().getSentenceHolder().add(new SentenceStructure(116, "src", "sent", "osent","midsagittal fontanel present","status",null,"modifer","type"));
//
// myTester2.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"fontanel", "s", "*", "1", "1", "", null}));
// myTester2.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"absent", "b", "*", "1", "1", "", null}));
// myTester2.getDataHolder().add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"present", "b", "*", "1", "1", "", null}));
//
// myTester2.wrapupMarkup();
//
// assertEquals("wrapupmarkup - case 2 - tag sentence", "midsagittal fontanel", myTester2.getDataHolder().getSentence(115).getTag());
// assertEquals("wrapupmarkup - case 2 - tag sentence", "midsagittal fontanel", myTester2.getDataHolder().getSentence(116).getTag());
}
@Test
public void testOneLeadMarkup(){
Learner myTester = learnerFactory();
myTester.getDataHolder().getSentenceHolder().add(new SentenceStructure(0, "src", "sent", "osent","lead1 lead2","status","tag tag","modifer","type"));
myTester.getDataHolder().getSentenceHolder().add(new SentenceStructure(1, "src", "sent", "osent","midsagittal fontanel present","status",null,"modifer","type"));
myTester.getDataHolder().getSentenceHolder().add(new SentenceStructure(2, "src", "sent", "osent","midsagittal fontanel present","status","tag1","modifer","type"));
myTester.getDataHolder().getSentenceHolder().add(new SentenceStructure(3, "src", "sent", "osent","tagx","status",null,"modifer","type"));
myTester.getDataHolder().getSentenceHolder().add(new SentenceStructure(4, "src", "sent", "osent","tagx tagx","status",null,"modifer","type"));
myTester.getDataHolder().getSentenceHolder().add(new SentenceStructure(5, "src", "sent", "osent","midsagittal fontanel present","status","tagx","modifer","type"));
myTester.getDataHolder().getSentenceHolder().add(new SentenceStructure(6, "src", "sent", "osent","midsagittal fontanel","status","tag2","modifer","type"));
myTester.oneLeadWordMarkup(myTester.getDataHolder().getCurrentTags());
assertEquals("oneLeadMarkup", "tagx", myTester.getDataHolder().getSentence(3).getTag());
}
@Test
public void testUnknownWordBootstrapping(){
// // 1. Preprocessing
// Learner myTester1 = learnerFactory();
// myTester1.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word1 unknown".split(" ")));
// Set<String> expected = new HashSet<String>();
//// expected.add("")
// assertEquals("unknownWordBootstrappingGetUnknownWord", expected , myTester1.unknownWordBootstrappingGetUnknownWord("(ee)"));
// 3. Postprocessing
Learner myTester3 = learnerFactory();
myTester3.getDataHolder().add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word1", "p", "role", "0", "0", "", ""}));
myTester3.getDataHolder().add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word2", "b", "role", "0", "0", "", ""}));
myTester3.getDataHolder().add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word3", "s", "role", "0", "0", "", ""}));
myTester3.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word1 word1".split(" ")));
myTester3.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word2 unknown".split(" ")));
myTester3.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("_wORd3 unknown".split(" ")));
myTester3.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word?_4 unknown".split(" ")));
myTester3.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("nor unknown".split(" ")));
myTester3.getDataHolder().add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word_6 unknown".split(" ")));
myTester3.getDataHolder().getSentenceHolder().add(new SentenceStructure(0, "src", "word1 word_6 word2", "osent","lead","status","tag","modifer","type"));
myTester3.getDataHolder().getSentenceHolder().add(new SentenceStructure(1, "src", "word_6 word2", "osent","lead","status","tag","modifer","type"));
myTester3.getDataHolder().getSentenceHolder().add(new SentenceStructure(2, "src", "word1 word6 word2", "osent","lead","status","tag","modifer","type"));
myTester3.unknownWordBootstrappingPostprocessing();
assertEquals("unknownWordBootstrapping - Postprocessing", "word1 <B>word_6</B> word2", myTester3.getDataHolder().getSentence(0).getSentence());
assertEquals("unknownWordBootstrapping - Postprocessing", "<B>word_6</B> word2", myTester3.getDataHolder().getSentence(1).getSentence());
assertEquals("unknownWordBootstrapping - Postprocessing", "word1 word6 word2", myTester3.getDataHolder().getSentence(2).getSentence());
myTester3.unknownWordBootstrappingPostprocessing();
}
@Test
public void testDittoHelper() {
String nPhrasePattern = "(?:<[A-Z]*[NO]+[A-Z]*>[^<]+?<\\/[A-Z]*[NO]+[A-Z]*>\\s*)+";
String mPhrasePattern = "(?:<[A-Z]*M[A-Z]*>[^<]+?<\\/[A-Z]*M[A-Z]*>\\s*)+";
Learner myTester = learnerFactory();
assertEquals("ditto helper", 0, myTester.dittoHelper(myTester.getDataHolder(), 0, "prismatic calcified <N>cartilage</N>", nPhrasePattern, mPhrasePattern));
assertEquals("ditto helper", 1, myTester.dittoHelper(
myTester.getDataHolder(), 0, "<B>absent</B>", nPhrasePattern,
mPhrasePattern));
assertEquals("ditto helper", 21,
myTester.dittoHelper(myTester.getDataHolder(), 0,
"<B>in</B> tubes below visceral surface <B>of</B> <M>dermal</M> <N>bone</N>",
nPhrasePattern, mPhrasePattern));
}
@Test
public void testPhraseClauseHelper() {
Learner myTester = learnerFactory();
String sentence = "mid and distal <B>progressively</B> smaller , <B>becoming</B> <B>sessile</B> , <B>narrower</B> , <N>bases</N> obtuse to acuminate , <M><B>cauline</B></M> <B>usually</B> 15 or fewer <B>.</B>";
assertEquals("phraseChauseHelper - empty return", new ArrayList<String>(), myTester.phraseClauseHelper(sentence));
sentence = "<M><B>cauline</B></M> <B>linear</B> or <B>oblong</B> , <B>crowded</B> or well separated , <B>usually</B> <B>not</B> surpassing <N>heads</N> <B>.</B>";
List<String> target = new ArrayList<String>(2);
target.add("");
target.add("heads");
assertEquals("phraseChauseHelper", target, myTester.phraseClauseHelper(sentence));
sentence = "distal <M><B>cauline</B></M> <B>sessile</B> , ?<N>decurrent</N> <B>.</B>";
target.clear();
target.add("");
target.add("decurrent");
assertEquals("phraseChauseHelper", target, myTester.phraseClauseHelper(sentence));
}
@Test
public void testPronounCharacterSubjectHelper() {
Learner myTester = learnerFactory();
List<String> target = new ArrayList<String>(2);
String lead;
String sentence;
String modifier;
String tag;
// null
lead = "prismatic calcified cartilage";
sentence = "prismatic calcified <N>cartilage</N>";
modifier = null;
tag = null;
assertEquals("pronounCharacterSubjectHelper null", null, myTester.pronounCharacterSubjectHelper(lead, sentence, modifier, tag));
// case 1.1.1
lead = "size of";
sentence = "<B>size</B> <B>of</B> <N>lateral</N> <B>gular</B>";
modifier = "";
tag = "ditto";
target.clear();
target.add("");
target.add("lateral");
assertEquals("pronounCharacterSubjectHelper case 1.1.1", target, myTester.pronounCharacterSubjectHelper(lead, sentence, modifier, tag));
// case 1.2.1.1
lead = "body scale profile";
sentence = "<M>body</M> <N>scale</N> <B>profile</B>";
modifier = "body";
tag = "scale";
target.clear();
target.add("body ");
target.add("scale");
assertEquals("pronounCharacterSubjectHelper case 1.2.1.1", target, myTester.pronounCharacterSubjectHelper(lead, sentence, modifier, tag));
// case 1.2.1.1
lead = "lyre_ shaped";
sentence = "<N>lyre_</N> <B>shaped</B>";
modifier = "";
tag = "lyre_";
target.clear();
target.add("");
target.add("ditto");
assertEquals("pronounCharacterSubjectHelper case 1.2.1.2", target, myTester.pronounCharacterSubjectHelper(lead, sentence, modifier, tag));
// case 1.2.2
lead = "shape of";
sentence = "<B>shape</B> <B>of</B> opercular <N>ossification</N>";
modifier = "";
tag = "ditto";
target.clear();
target.add("");
target.add("ditto");
assertEquals("pronounCharacterSubjectHelper case 1.2.2", target, myTester.pronounCharacterSubjectHelper(lead, sentence, modifier, tag));
}
@Test
public void testPronounCharacterSubjectHelper4() {
Learner myTester = learnerFactory();
List<String> target = new ArrayList<String>(2);
String lead;
String sentence;
String modifier;
String tag;
// null
lead = "prismatic calcified cartilage";
sentence = "prismatic calcified <N>cartilage</N>";
modifier = null;
tag = null;
assertEquals("pronounCharacterSubjectHelper null", null, myTester.pronounCharacterSubjectHelper4(lead, sentence, modifier, tag));
//
// lead = "skull shape";
// sentence = "<N>skull</N> <B>shape</B>";
// modifier = "";
// tag = "skull";
// target.clear();
// target.add("");
// target.add("skull");
// assertEquals("pronounCharacterSubjectHelper4", target, myTester.pronounCharacterSubjectHelper(lead, sentence, modifier, tag));
}
@Test
public void testAndOrTagCase1Helper() {
Learner myTester = learnerFactory();
String sPattern = Constant.SEGANDORPTN;
String wPattern = Constant.ANDORPTN;
Set<String> token = new HashSet<String>();
token.addAll(Arrays.asList("and or nor".split(" ")));
token.add("\\");
token.add("and / or");
// test case 1
String pattern = "qqn&p";
List<String> words = new ArrayList<String>();
words.addAll(Arrays.asList("smaller undifferentiated <N>plates</N> or tesserae".split(" ")));
List<List<String>> target = new ArrayList<List<String>>();
List<String> mPatterns = new ArrayList<String>();
mPatterns.add("qq");
List<String> mSegments = new ArrayList<String>();
mSegments.add("smaller undifferentiated");
List<String> sPatterns = new ArrayList<String>();
sPatterns.addAll(Arrays.asList("n p".split(" ")));
List<String> sSegments = new ArrayList<String>();
sSegments.addAll(Arrays.asList("<N>plates</N> tesserae".split(" ")));
List<String> tagAndModifier1 = new ArrayList<String>();
tagAndModifier1.add("");
tagAndModifier1.add("smaller undifferentiated plates or tesserae");
List<String> tagAndModifier2 = new ArrayList<String>();
List<String> update1 = new ArrayList<String>();
List<String> update2 = new ArrayList<String>();
update2.add("tesserae");
target.add(mPatterns);
target.add(mSegments);
target.add(sPatterns);
target.add(sSegments);
target.add(tagAndModifier1);
target.add(tagAndModifier2);
target.add(update1);
target.add(update2);
assertEquals("andOrTagCase1Helper", target, myTester.andOrTagCase1Helper(pattern, wPattern, words, token));
// List<List<String>> returned = myTester.andOrTagCase1Helper(pattern, wPattern, words, token);
// System.out.println(returned);
// test case 2
pattern = "n&qqnbq";
words.clear();
words.addAll(Arrays.asList("<N>perforate</N> or fenestrate anterodorsal <N>portion</N> <B>of</B> palatoquadrate".split(" ")));
mPatterns.clear();
mSegments.clear();
sPatterns.clear();
sSegments.clear();
mPatterns.add("qq");
mSegments.add("fenestrate anterodorsal");
sPatterns.addAll(Arrays.asList("n n".split(" ")));
sSegments.addAll(Arrays.asList("<N>perforate</N> <N>portion</N>".split(" ")));
tagAndModifier1.clear();
tagAndModifier1.add("");
tagAndModifier1.add("perforate or fenestrate anterodorsal portion");
tagAndModifier2.clear();
update1.clear();
update2.clear();
assertEquals("andOrTagCase1Helper", target, myTester.andOrTagCase1Helper(pattern, wPattern, words, token));
}
@Test
public void testFinalizeCompoundModifier() {
Learner myTester = learnerFactory();
// case 1
String modifier = "maxillary and [dentary] tooth_ bearing";
String tag = "elements";
String sentence = "maxillary and dentary <B>tooth_</B> bearing <N>elements</N>";
assertEquals("finalizeCompoundModifier case 1", modifier,
myTester.finalizeCompoundModifier(myTester.getDataHolder(), modifier, tag, sentence));
}
@Test
public void testGetMCount(){
Learner myTester = learnerFactory();
DataHolder myDataHolder = myTester.getDataHolder();
myDataHolder.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"source1", "<B>number</B> <B>of</B> <M><B>marginal</B></M> <N>bones</N> <B>alongside</B> postparietal", "o1", "lead1", "status1", "tag1", "modifier1", "type1"}));
myDataHolder.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"source1", "through <M><B>marginal</B></M> <N>bones</N> <B>alongside</B> postparietal", "o1", "lead1", "status1", "tag1", "modifier1", "type1"}));
myDataHolder.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"source1", "<M><B>marginal</B></M> <N>teeth</N> <B>on</B> dentary", "o1", "lead1", "status1", "tag1", "modifier1", "type1"}));
myDataHolder.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"source1", "<B>broad</B> <M><B>marginal</B></M> <N>tooth</N> <B>field</B>", "o1", "lead1", "status1", "tag1", "modifier1", "type1"}));
myDataHolder.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"source1", "<B>narrow</B> <M><B>marginal</B></M> <N>tooth</N> <N>row</N>", "o1", "lead1", "status1", "tag1", "modifier1", "type1"}));
myDataHolder.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"source1", "anterodorsal <B>peg_</B> like <N>process</N> <B>on</B> <N>scale</N>", "o1", "lead1", "status1", "tag1", "modifier1", "type1"}));
assertEquals("getMCount", 5, myTester.getMCount(myDataHolder, "marginal"));
}
@Test
public void testNormalizeItem() {
Learner myTester = learnerFactory();
DataHolder myDataHolder = myTester.getDataHolder();
assertEquals("normalizeItem case 2", "general", myTester.normalizeItem("general"));
assertEquals("normalizeItem case 2", "fin", myTester.normalizeItem("fins"));
assertEquals("normalizeItem case 2", "squamosal and quadratojugal and bone",
myTester.normalizeItem("squamosal and quadratojugal and bones"));
}
@Test
public void testAdjectiveSubjectsHelper(){
Learner myTester = learnerFactory();
DataHolder myDataHolder = myTester.getDataHolder();
Set<String> typeModifiers = new HashSet<String>();
Set<String> target = new HashSet<String>();
target.addAll(Arrays.asList("open anterior paired".split(" ")));
myDataHolder.add2Holder(
DataHolder.SENTENCE,
Arrays.asList(new String[] { "src", "endolymphatic <N>ducts</N> <M><B>open</B></M> <B>in</B> <M>dermal</M> <N>skull</N> roof", "osent", "lead",
"status", "", "structure3", "type" }));
myDataHolder.add2Holder(
DataHolder.SENTENCE,
Arrays.asList(new String[] { "src", "restricted to <B>the</B> <M>anterior</M> <B>third</B> <B>of</B> <B>the</B> <N>jaw</N>", "osent", "lead",
"status", "", "structure3", "type" }));
myDataHolder.add2Holder(
DataHolder.SENTENCE,
Arrays.asList(new String[] { "src", "<B>series</B> <B>of</B> <M>paired</M> <B>median</B> <N>skull</N> roofng <N>bones</N> <B>that</B> meet <B>at</B> <B>the</B> <M>dorsal</M> midline <B>of</B> <B>the</B> <N>skull</N>", "osent", "lead",
"status", "", "structure3", "type" }));
myDataHolder.add2Holder(
DataHolder.SENTENCE,
Arrays.asList(new String[] { "src", "anterior dorsal fontanelle", "osent", "lead",
"status", "", "structure3", "type" }));
assertEquals("adjectiveSubjectsHelper", target,
myTester.adjectiveSubjectsPart1(myDataHolder, typeModifiers));
}
@Test
public void testAdjectiveSubjectsPart2Helper1(){
Learner myTester = learnerFactory();
DataHolder myDataHolder = myTester.getDataHolder();
Set<String> typeModifiers = new HashSet<String>();
typeModifiers.addAll(Arrays.asList("open|paired|anterior|through".split("\\|")));
assertEquals("AdjectiveSubjectsPart2Helper1", true, myTester.adjectiveSubjectsPart2Helper1("restricted to <B>the</B> <M>anterior</M> <B>third</B> <B>of</B> <B>the</B> <N>jaw</N>", typeModifiers));
assertEquals("AdjectiveSubjectsPart2Helper1", false, myTester.adjectiveSubjectsPart2Helper1("restricted to <B>the</B> <B>third</B> <B>of</B> <B>the</B> <N>jaw</N>", typeModifiers));
assertEquals("AdjectiveSubjectsPart2Helper1", true, myTester.adjectiveSubjectsPart2Helper1("<B>series</B> <B>of</B> <M>paired</M> <B>median</B> <N>skull</N> roofng <N>bones</N> <B>that</B> meet <B>at</B> <B>the</B> <M>dorsal</M> midline <B>of</B> <B>the</B> <N>skull</N>", typeModifiers));
assertEquals("AdjectiveSubjectsPart2Helper1", false, myTester.adjectiveSubjectsPart2Helper1("<B>series</B> <B>of paired median</B> <N>skull</N> roofng <N>bones</N> <B>that</B> meet <B>at</B> <B>the</B> <M>dorsal</M> midline <B>of</B> <B>the</B> <N>skull</N>", typeModifiers));
}
private Learner learnerFactory() {
Learner tester;
Configuration myConfiguration = new Configuration();
ITokenizer tokenizer = new OpenNLPTokenizer(
myConfiguration.getOpenNLPTokenizerDir());
ITokenizer sentenceDetector = new OpenNLPSentencesTokenizer(
myConfiguration.getOpenNLPSentenceDetectorDir());
WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null;
try {
wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(myConfiguration.getWordNetDictDir(), false);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
LearnerUtility myLearnerUtility = new LearnerUtility(sentenceDetector,
tokenizer, wordNetPOSKnowledgeBase);
tester = new Learner(myConfiguration, tokenizer, myLearnerUtility);
return tester;
}
}