package semanticMarkup.ling.learn; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.HashMap; //import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import semanticMarkup.io.input.lib.db.ParentTagProvider; import semanticMarkup.ling.learn.UnsupervisedClauseMarkup; import semanticMarkup.ling.learn.dataholder.DataHolder; import semanticMarkup.ling.learn.dataholder.SentenceStructure; import semanticMarkup.ling.transform.ITokenizer; import semanticMarkup.ling.transform.lib.OpenNLPSentencesTokenizer; import semanticMarkup.ling.transform.lib.OpenNLPTokenizer; public class UnsupervisedClauseMarkupTest { UnsupervisedClauseMarkup tester; @Before public void initialize() { tester = UnsupervisedClauseMarkupFactory(); } @Test public void testReadAdjNouns() { DataHolder myDataHolder = tester.getDataHolder(); List<SentenceStructure> sentenceTable = myDataHolder.getSentenceHolder(); sentenceTable.add(new SentenceStructure(0, "source1", "word1 word2", "", "", "", "tag1", "modifier1", "")); sentenceTable.add(new SentenceStructure(1, "source2", "word2 word3", "", "", "", "[tag2", " modifier2[abc]", "")); sentenceTable.add(new SentenceStructure(2, "source3", "word3", "", "", "", "[tag3", "[abc]modifier2 ", "")); sentenceTable.add(new SentenceStructure(3, "source4", "word1 word3 word4", "", "", "", "[tag4", " mo[123]difier3", "")); List<String> resultGetAdjNouns = new ArrayList<String>(); resultGetAdjNouns.add("modifier3"); resultGetAdjNouns.add("modifier2"); assertEquals("Method readAdjNouns", resultGetAdjNouns, tester.readAdjNouns()); } @Test public void testReadAdjNounSent() { UnsupervisedClauseMarkup myTester = UnsupervisedClauseMarkupFactory(); DataHolder myDataHolder = myTester.getDataHolder(); List<SentenceStructure> sentenceTable = myDataHolder.getSentenceHolder(); sentenceTable.add(new SentenceStructure(0, "source1", "word1 word2", "", "", "", "tag1", "modifier1", "")); sentenceTable.add(new SentenceStructure(1, "source2", "word2 word3", "", "", "", "[tag2", " modifier2[abc]", "")); sentenceTable.add(new SentenceStructure(2, "source3", "word3", "", "", "", "[tag3", "[abc]modifier2 ", "")); sentenceTable.add(new SentenceStructure(3, "source4", "word1 word3 word4", "", "", "", "[tag4", " mo[123]difier3", "")); Map<String, String> resultGetAdjNounSent = new HashMap<String, String>(); resultGetAdjNounSent.put("[tag2", "modifier2"); resultGetAdjNounSent.put("[tag3", "modifier2"); resultGetAdjNounSent.put("[tag4", "modifier3"); assertEquals("Method readAdjNouns", resultGetAdjNounSent, myTester.readAdjNounSent()); } @Test public void testReadBracketTags() { UnsupervisedClauseMarkup myTester = UnsupervisedClauseMarkupFactory(); myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status","tag","start","type"})); myTester.getDataHolder().add2Holder(DataHolder.SENTENCE, Arrays.asList(new String[] {"src", "sent", "osent","lead","status","[tag]","start end","type"})); Set<String> target = new HashSet<String>(); target.add("end"); assertEquals("Method readBracketTags", target, myTester.readBracketTags()); } @Test public void testReadWordToSoures() { UnsupervisedClauseMarkup myTester = UnsupervisedClauseMarkupFactory(); DataHolder myDataHolder = myTester.getDataHolder(); List<SentenceStructure> sentenceTable = myDataHolder.getSentenceHolder(); sentenceTable.add(new SentenceStructure(0, "source.ignore.1", "word1 word2", "", "", "", "tag1", "modifier1", "")); sentenceTable.add(new SentenceStructure(1, "source.ignore.2", "word2 word3", "", "", "", "[tag2", " modifier2[abc]", "")); sentenceTable.add(new SentenceStructure(2, "source.ignore.3", "word3", "", "", "", "[tag3", "[abc]modifier2 ", "")); sentenceTable.add(new SentenceStructure(3, "source.ignore.4", "word1 word3 word4", "", "", "", "[tag4", " mo[123]difier3", "")); // getWordToSources Map<String, Set<String>> resultGetWordToSources = new HashMap<String, Set<String>>(); resultGetWordToSources.put("word1", new HashSet<String>()); resultGetWordToSources.get("word1").add("source.1"); resultGetWordToSources.get("word1").add("source.4"); resultGetWordToSources.put("word2", new HashSet<String>()); resultGetWordToSources.get("word2").add("source.1"); resultGetWordToSources.get("word2").add("source.2"); resultGetWordToSources.put("word3", new HashSet<String>()); resultGetWordToSources.get("word3").add("source.2"); resultGetWordToSources.get("word3").add("source.3"); resultGetWordToSources.get("word3").add("source.4"); resultGetWordToSources.put("word4", new HashSet<String>()); resultGetWordToSources.get("word4").add("source.4"); assertEquals("Method readWordToSources", resultGetWordToSources, myTester.readWordToSources()); } @Test public void testReadHeuristicNouns() { UnsupervisedClauseMarkup myTester = UnsupervisedClauseMarkupFactory(); DataHolder myDataHolder = myTester.getDataHolder(); Map<String, String> myHeuristicNouns = myDataHolder .getHeuristicNounHolder(); myHeuristicNouns.put("word1", "type1"); myHeuristicNouns.put("word2", "type2"); Map<String, String> resultGetHeuristicNouns = new HashMap<String, String>(); resultGetHeuristicNouns.put("word2", "type2"); resultGetHeuristicNouns.put("word1", "type1"); assertEquals("Method readHeuristicNouns", resultGetHeuristicNouns, myTester.readHeuristicNouns()); } private UnsupervisedClauseMarkup UnsupervisedClauseMarkupFactory() { Configuration myConfiguration = new Configuration(); ParentTagProvider parentTagProvider = new ParentTagProvider(); ITokenizer sentenceDetector = new OpenNLPSentencesTokenizer( myConfiguration.getOpenNLPSentenceDetectorDir()); Set<String> selectedSources = new HashSet<String>(); ITokenizer tokenizer = new OpenNLPTokenizer(myConfiguration.getOpenNLPTokenizerDir()); UnsupervisedClauseMarkup myUnsupervisedClauseMarkup = new UnsupervisedClauseMarkup("plain", null, parentTagProvider, selectedSources, sentenceDetector, tokenizer); return myUnsupervisedClauseMarkup; } }