package semanticMarkup.ling.learn.knowledge; import java.util.regex.Matcher; import semanticMarkup.ling.learn.dataholder.DataHolder; import semanticMarkup.ling.learn.dataholder.SentenceStructure; import semanticMarkup.ling.learn.utility.LearnerUtility; import semanticMarkup.ling.learn.utility.StringUtility; /** * Annotate any clause which share its subject with an previous clause by * "ditto". * * @author Dongye * */ public class DittoAnnotator implements IModule { private LearnerUtility myLearnerUtility; public DittoAnnotator(LearnerUtility learnerUtility) { this.myLearnerUtility = learnerUtility; } @Override public void run(DataHolder dataholderHandler) { this.ditto(dataholderHandler); } public void ditto(DataHolder dataholderHandler) { String nPhrasePattern = "(?:<[A-Z]*[NO]+[A-Z]*>[^<]+?<\\/[A-Z]*[NO]+[A-Z]*>\\s*)+"; String mPhrasePattern = "(?:<[A-Z]*M[A-Z]*>[^<]+?<\\/[A-Z]*M[A-Z]*>\\s*)+"; for (SentenceStructure sentenceItem : dataholderHandler .getSentenceHolder()) { if (sentenceItem.getTag() == null) { int sentenceID = sentenceItem.getID(); String sentence = sentenceItem.getSentence(); this.dittoHelper(dataholderHandler, sentenceID, sentence, nPhrasePattern, mPhrasePattern); } } } public int dittoHelper(DataHolder dataholderHandler, int sentenceID, String sentence, String nPhrasePattern, String mPhrasePattern) { int res = 0; String sentenceCopy = "" + sentence; sentenceCopy = sentenceCopy.replaceAll("></?", ""); String modifier = ""; Matcher m2 = StringUtility.createMatcher(sentenceCopy, "(.*?)" + nPhrasePattern); if (!StringUtility.isMatchedNullSafe(sentence, "<[NO]>")) { String tag = "ditto"; dataholderHandler.tagSentenceWithMT(sentenceID, sentence, "", tag, "ditto-no-N"); res = 1; } else if (m2.find()) { String head = m2.group(1); String pattern21 = String .format("\\b(%s)\\b", this.myLearnerUtility.getConstant().PREPOSITION); if (StringUtility.isMatchedNullSafe(head, pattern21)) { String tag = "ditto"; dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, tag, "ditto-proposition"); res = 21; } else if (StringUtility.isMatchedNullSafe(head, ",<\\/B>\\s*$")) { String tag = "ditto"; dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, tag, "ditto-,-N"); res = 22; } } return res; } }