package semanticMarkup.ling.learn;
import static org.junit.Assert.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.junit.Before;
import org.junit.Test;
import semanticMarkup.know.lib.WordNetPOSKnowledgeBase;
import semanticMarkup.ling.learn.auxiliary.POSInfo;
import semanticMarkup.ling.learn.dataholder.DataHolder;
import semanticMarkup.ling.learn.dataholder.DiscountedKey;
import semanticMarkup.ling.learn.dataholder.ModifierTableValue;
import semanticMarkup.ling.learn.dataholder.SentenceStructure;
import semanticMarkup.ling.learn.dataholder.SingularPluralPair;
import semanticMarkup.ling.learn.dataholder.WordPOSKey;
import semanticMarkup.ling.learn.dataholder.WordPOSValue;
import semanticMarkup.ling.learn.knowledge.Constant;
import semanticMarkup.ling.learn.utility.WordFormUtility;
import semanticMarkup.ling.transform.ITokenizer;
import semanticMarkup.ling.transform.lib.OpenNLPSentencesTokenizer;
import semanticMarkup.ling.transform.lib.OpenNLPTokenizer;
public class DataHolderTest {
private DataHolder tester;
private DataHolder dataHolderFactory() {
DataHolder tester;
Configuration myConfiguration = new Configuration();
WordNetPOSKnowledgeBase wordNetPOSKnowledgeBase = null;
try {
wordNetPOSKnowledgeBase = new WordNetPOSKnowledgeBase(myConfiguration.getWordNetDictDir(), false);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
WordFormUtility wordFormUtility = new WordFormUtility(wordNetPOSKnowledgeBase);
Constant myConstant = new Constant();
tester = new DataHolder(myConfiguration, myConstant, wordFormUtility);
return tester;
}
@Before
public void initialize(){
tester = dataHolderFactory();
}
@Test
public void testUpdateTable() {
// Method updateDataHolder
assertEquals("updateDataHolder - empty word", 0,
tester.updateDataHolder("", "", "", "", 0));
assertEquals("updateDataHolder - forbidden word", 0,
tester.updateDataHolder("to", "", "", "", 0));
}
@Test
public void testMarkKnown() {
// Method markKnown
assertEquals("markKnown - forbidden word", 0,
tester.markKnown("and", "", "", "", 0));
//assertEquals("markKnown - stop word", 0,
// tester.markKnown("page", "", "", "", 0));
// case 1 & 2
tester.markKnown("dentinous", "b", "", "wordpos", 1);
// case 2
tester.markKnown("lamentous", "b", "", "wordpos", 1);
}
@Test
public void testUpdatePOS() {
DataHolder myTester = dataHolderFactory();
assertEquals("updatePOS - no update", 0, myTester.updatePOS("NUM", "n", "", 1));
assertEquals("updatePOS - no update", 0, myTester.updatePOS("two", "s", "", 1));
assertEquals("updatePOS - no update", 0, myTester.updatePOS("series", "p", "", 1));
assertEquals("updatePOS - no update", 0, myTester.updatePOS("heights", "n", "", 1));
Map<WordPOSKey, WordPOSValue> target = new HashMap<WordPOSKey, WordPOSValue>();
target.put(new WordPOSKey("word1", "n"), new WordPOSValue("role1", 2, 0, null, null));
myTester.updatePOS("word1", "n", "role1", 2);
assertEquals("updatePOS - add", target, myTester.getWordPOSHolder());
}
@Test
public void testChangePOS() {
//DataHolder myTester = dataHolderFactory();
//assertEquals("changePOS", "", myTester.getDataHolder().changePOS("newWord", "oldPOS", "newPOS", "newRole", 3));
}
@Test
public void testAddSingularPluralPair() {
// Method addSingularPluralPair
assertEquals("addSigularPluralPair - pair not exist", true, tester.addSingularPluralPair("sword", "pword"));
tester.getSingularPluralHolder().add(new SingularPluralPair("sword2",""));
assertEquals("addSigularPluralPair - one word exist", true, tester.addSingularPluralPair("sword2", "pword2"));
tester.getSingularPluralHolder().add(new SingularPluralPair("sword3","pword3"));
assertEquals("addSigularPluralPair - pair exists", false, tester.addSingularPluralPair("sword3", "pword3"));
}
@Test
public void testIsInSingularPluralPair() {
// Method inSingularPluralPair
assertEquals("inSingularPluralPair - null", false,
tester.isInSingularPluralPair("word"));
tester.getSingularPluralHolder().add(new SingularPluralPair(
"word1", ""));
assertEquals("inSingularPluralPair - singular match", true,
tester.isInSingularPluralPair("word1"));
tester.getSingularPluralHolder().add(new SingularPluralPair("",
"word2"));
assertEquals("inSingularPluralPair - plural match", true,
tester.isInSingularPluralPair("word2"));
tester.getSingularPluralHolder().add(new SingularPluralPair(
"word3", "word3"));
assertEquals("inSingularPluralPair - both match", true,
tester.isInSingularPluralPair("word3"));
}
@Test
public void testAddModifier(){
Map<String, ModifierTableValue> target = new HashMap<String, ModifierTableValue>();
target.put("word", new ModifierTableValue(1,false));
tester.addModifier("word", 10);
assertEquals("addModifier - add", target, tester.getModifierHolder());
target.put("word", new ModifierTableValue(10,false));
tester.addModifier("word", 9);
assertEquals("addModifier - add", target, tester.getModifierHolder());
}
@Test
public void testUpdateUnknownWord(){
Map<String, String> target = new HashMap<String, String>();
target.put("word", "word");
tester.getUnknownWordHolder().put("word", "unknown");
tester.updateUnknownWord("word", "word");
assertEquals("updateUnknownWord - add", target, tester.getUnknownWordHolder());
}
@Test
public void testResolveConflict(){
DataHolder myTester = dataHolderFactory();
myTester.getSentenceHolder().add(new SentenceStructure(0, "source", "word branches word1 end", "word branches word1 end", "lead", "status", "ignore", null, null));
myTester.getSentenceHolder().add(new SentenceStructure(1, "source", "word branches word2 end", "word branches word2 end", "lead", "status", "nonignore", null, null));
myTester.getSentenceHolder().add(new SentenceStructure(2, "source", "word branches word3 end", "word branches word3 end", "lead", "status", null, null, null));
assertEquals("resolveConfilct - otherPOS", "otherPOS", myTester.resolveConflict("word1", "bPOS", "otherPOS"));
assertEquals("resolveConfilct - otherPOS", "bPOS", myTester.resolveConflict("word2", "bPOS", "otherPOS"));
assertEquals("resolveConfilct - otherPOS", "bPOS", myTester.resolveConflict("word3", "bPOS", "otherPOS"));
}
@Test
public void testDiscountPOS(){
DataHolder myTester = dataHolderFactory();
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList(new String[] {"word1", "flag1"}));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList(new String[] {"word2", "unknown"}));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList(new String[] {"word3", "flag1"}));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList(new String[] {"word4", "flag2"}));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList(new String[] {"word5", "flag1"}));
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"word1", "s", "role1", "1", "1", null, null}));
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"word2", "p", "role1", "2", "1", null, null}));
myTester.add2Holder(DataHolder.SINGULAR_PLURAL, Arrays.asList(new String[] {"word1", "word1plural"}));
myTester.add2Holder(DataHolder.SINGULAR_PLURAL, Arrays.asList(new String[] {"word1singular", "word1"}));
myTester.add2Holder(DataHolder.SINGULAR_PLURAL, Arrays.asList(new String[] {"word2singular", "word2plural"}));
Map<String, String> targetUnknownWordHolder = new HashMap<String, String>();
targetUnknownWordHolder = myTester.add2UnknowWordHolder(targetUnknownWordHolder, Arrays.asList(new String[] {"word1", "unknown"}));
targetUnknownWordHolder = myTester.add2UnknowWordHolder(targetUnknownWordHolder, Arrays.asList(new String[] {"word2", "unknown"}));
targetUnknownWordHolder = myTester.add2UnknowWordHolder(targetUnknownWordHolder, Arrays.asList(new String[] {"word3", "flag1"}));
targetUnknownWordHolder = myTester.add2UnknowWordHolder(targetUnknownWordHolder, Arrays.asList(new String[] {"word4", "flag2"}));
targetUnknownWordHolder = myTester.add2UnknowWordHolder(targetUnknownWordHolder, Arrays.asList(new String[] {"word5", "flag1"}));
DataHolder targetDataHolder = dataHolderFactory();
targetDataHolder.add2WordPOSHolder(Arrays.asList(new String[] {"word2", "p", "role1", "1", "1", null, null}));
Map<WordPOSKey, WordPOSValue> targetWordPOSHolder = targetDataHolder.getWordPOSHolder();
Set<SingularPluralPair> targetSingularPluralHolder = new HashSet<SingularPluralPair>();
targetSingularPluralHolder = myTester.add2SingularPluralHolder(targetSingularPluralHolder, Arrays.asList(new String[] {"word2singular", "word2plural"}));
Map<DiscountedKey, String> targetDiscountedHolder = new HashMap<DiscountedKey, String>();
targetDiscountedHolder = myTester.add2DiscountedHolder(targetDiscountedHolder, Arrays.asList(new String[] {"word2", "p", "newPOS"}));
myTester.discountPOS("word1", "s", "newPOS", "all");
myTester.discountPOS("word2", "p", "newPOS", "notAll");
assertEquals("discountPOS - delete - UnknownWord", targetUnknownWordHolder, myTester.getUnknownWordHolder());
assertEquals("discountPOS - delete - WordPOS", targetWordPOSHolder, myTester.getWordPOSHolder());
assertEquals("discountPOS - delete - SingularPlural", targetSingularPluralHolder, myTester.getSingularPluralHolder());
}
@Test
public void testMergeRole() {
// Method mergeRole
assertEquals("mergeRole - case 1", "", tester.mergeRole("*", ""));
assertEquals("mergeRole - case 2", "", tester.mergeRole("", "*"));
assertEquals("mergeRole - case 3", "-", tester.mergeRole("", "-"));
assertEquals("mergeRole - case 4", "-", tester.mergeRole("-", ""));
assertEquals("mergeRole - case 5", "+", tester.mergeRole("-", "_"));
assertEquals("mergeRole - case 6", "-", tester.mergeRole("-", "-"));
}
@Test
public void testGetParentSentenceTag(){
DataHolder myTester = dataHolderFactory();
myTester.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"src0","s0","begin with lowercase","l0","s0",null,"m0","t0"}));
myTester.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"src1","s1","Begin with lowercase","l1","s1","ignore","m1","t1"}));
myTester.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"src2","s2","Begin with uppercase","l2","s2","ignore","m2","t2"}));
myTester.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"src3","s3","end with colon: ","l3","s3",null,"m[3][","t3"}));
myTester.add2Holder(DataHolder.SENTENCE,
Arrays.asList(new String[] {"src4","s4","begin with lowercase","l4","s4","t4","m4","t4"}));
assertEquals("getParentSentenceTag", "[parenttag]",myTester.getParentSentenceTag(0));
assertEquals("getParentSentenceTag", "[parenttag]",myTester.getParentSentenceTag(1));
assertEquals("getParentSentenceTag", "[m3 ]",myTester.getParentSentenceTag(4));
}
@Test
public void testGetMTFromParentTag() {
// Method getMTFromParentTag
List<String> pair = new ArrayList<String>();
pair.add("");
pair.add("");
assertEquals("getMTFromParentTag - case 0: fail", pair,
tester.getMTFromParentTag("[modifier_ta"));
pair.remove(1);
pair.remove(0);
pair.add("modifier");
pair.add("tag");
assertEquals("getMTFromParentTag - case 1: with []", pair,
tester.getMTFromParentTag("[modifier tag]"));
assertEquals("getMTFromParentTag - case 2: without []", pair,
tester.getMTFromParentTag("modifier tag"));
}
@Test
public void testRemoveLyEndingBoundary(){
DataHolder myTester = dataHolderFactory();
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"word1ly", "b", "role1", "1", "1", null, null}));
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"word2ly", "*", "role1", "1", "1", null, null}));
assertEquals("RemoveLyEndingBoundary", "word2", myTester.tagSentWithMTRemoveLyEndingBoundary("word1ly word2"));
assertEquals("RemoveLyEndingBoundary", "word2ly word2", myTester.tagSentWithMTRemoveLyEndingBoundary("word2ly word2"));
}
@Test
public void testTagSentWithMTPreProcessing(){
DataHolder myTester = dataHolderFactory();
assertEquals("tagSentWithMTPreProcessing - null", null, myTester.tagSentWithMTPreProcessing(null));
assertEquals("RemoveLyEndingBoundary - remove <>", "word1 word3", myTester.tagSentWithMTPreProcessing("word1 <word2> word3"));
assertEquals("RemoveLyEndingBoundary remove beginning stop words", "word", myTester.tagSentWithMTPreProcessing("after <word2> after above word"));
assertEquals("RemoveLyEndingBoundary remove ending -ly words", "word1", myTester.tagSentWithMTPreProcessing("word1 <word2> word3ly word4ly"));
}
@Test
public void testGetSumCertaintyU(){
DataHolder myTester = dataHolderFactory();
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"target", "pos1", "role", "1", "5", null, null}));
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"target", "pos2", "role", "1", "5", null, null}));
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"target", "pos3", "role", "1", "5", null, null}));
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"word", "pos", "role", "1", "5", null, null}));
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"target", "pos4", "role", "1", "5", null, null}));
myTester.add2Holder(DataHolder.WORDPOS, Arrays.asList(new String[] {"target", "pos5", "role", "1", "5", null, null}));
assertEquals("getSumCertaintyU", 5 ,myTester.getSumCertaintyU("target"));
}
@Test
public void testSingularPluralVariations(){
Set<SingularPluralPair> singularPluralTable = new HashSet<SingularPluralPair> ();
singularPluralTable.add(new SingularPluralPair("vertebra", "vertebrae"));
singularPluralTable.add(new SingularPluralPair("curimatidae","curimatida"));
singularPluralTable.add(new SingularPluralPair("bone","bones"));
assertEquals("singularPluralVariations", "vertebra|vertebrae", tester.singularPluralVariations("vertebra", singularPluralTable));
assertEquals("singularPluralVariations", "curimatidae|curimatida", tester.singularPluralVariations("curimatidae", singularPluralTable));
assertEquals("singularPluralVariations", "curimatida|curimatidae", tester.singularPluralVariations("curimatida", singularPluralTable));
assertEquals("singularPluralVariations", "bones|bone", tester.singularPluralVariations("bones", singularPluralTable));
}
@Test
public void testCheckPOSInfo(){
DataHolder myTester = dataHolderFactory();
myTester.add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word1", "pos3", "role", "1", "4", "", ""}));
myTester.add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word2", "pos3", "role", "1", "4", "", ""}));
myTester.add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word1", "pos1", "role", "3", "4", "", ""}));
myTester.add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word2", "pos1", "role", "3", "4", "", ""}));
myTester.add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word1", "pos2", "role", "2", "4", "", ""}));
myTester.add2Holder(DataHolder.WORDPOS,
Arrays.asList(new String[] {"word2", "pos2", "role", "2", "4", "", ""}));
List<POSInfo> target1 = new ArrayList<POSInfo>();
target1.add(new POSInfo("123", "b", "", 1, 1));
assertEquals("checkPOSInfo - digit", target1, myTester.checkPOSInfo("123"));
List<POSInfo> target2 = new ArrayList<POSInfo>();
assertEquals("checkPOSInfo - not found", target2, myTester.checkPOSInfo("abc"));
List<POSInfo> target3 = new ArrayList<POSInfo>();
target3.add(new POSInfo("word1", "pos1", "role", 3, 4));
target3.add(new POSInfo("word1", "pos2", "role", 2, 4));
target3.add(new POSInfo("word1", "pos3", "role", 1, 4));
assertEquals("checkPOSInfo - found multiple", target3, myTester.checkPOSInfo("word1"));
}
@Test
public void testUpdateTableNNConditionHelper(){
assertEquals("updateDataHolderNN case 0 - true", true, tester.updateDataHolderNNConditionHelper("word"));
assertEquals("updateDataHolderNN case 1 stop words - false", false, tester.updateDataHolderNNConditionHelper(" page"));
assertEquals("updateDataHolderNN case 2 -ly ending words - false", false, tester.updateDataHolderNNConditionHelper("hello abcly "));
assertEquals("updateDataHolderNN case 3 forbidden words - false", false, tester.updateDataHolderNNConditionHelper("nor $%^iwopf0-v"));
}
@Test
public void testUpdateTableNN() {
List<String> input1 = new ArrayList<String>();
input1.addAll(Arrays.asList("hyohyoidei muscle".split(" ")));
assertEquals("updateDataHolderNN case 3 forbidden words - false", 1, tester.updateDataHolderNN(0, 2, input1));
}
@Test
public void testGetWordsFromUnknownWord(){
DataHolder myTester = this.dataHolderFactory();
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word3 unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("cheek unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("cross unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("deep unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("denticles unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word4 unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("endocranium unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word5 unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("lepidotrichia unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word1 unknown".split(" ")));
myTester.add2Holder(DataHolder.UNKNOWNWORD, Arrays.asList("word2 unknown".split(" ")));
Set<String> target = new HashSet<String>();
target.add("cheek");
target.add("cross");
target.add("deep");
target.add("denticles");
target.add("endocranium");
target.add("lepidotrichia");
String wordPattern = "(("+ Constant.PLENDINGS + "|ium)$)|(ee)";
String flagPattern = "^unknown$";
assertEquals("getWordsFromUnknownWord", target, myTester.getWordsFromUnknownWord(wordPattern, true, flagPattern, true));
}
}