package semanticMarkup.ling.learn.knowledge; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import semanticMarkup.ling.learn.dataholder.DataHolder; import semanticMarkup.ling.learn.dataholder.SentenceStructure; import semanticMarkup.ling.learn.utility.LearnerUtility; import semanticMarkup.ling.learn.utility.StringUtility; /** * Annotate directly the remaining clauses which are phrases with the head noun. * * @author Dongye * */ public class PhraseClauseAnnotator implements IModule { private LearnerUtility myLearnerUtility; public PhraseClauseAnnotator(LearnerUtility learnerUtility) { this.myLearnerUtility = learnerUtility; } @Override public void run(DataHolder dataholderHandler) { this.phraseClause(dataholderHandler); } public void phraseClause(DataHolder dataholderHandler) { for (SentenceStructure sentenceItem : dataholderHandler .getSentenceHolder()) { if (sentenceItem.getTag() == null) { int sentenceID = sentenceItem.getID(); String sentence = sentenceItem.getSentence(); List<String> res = this.phraseClauseHelper(sentence); if (res != null && res.size() == 2) { String modifier = res.get(0); String tag = res.get(1); dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, tag, "phraseclause"); } } } } public List<String> phraseClauseHelper(String sentence) { if (sentence == null) { return null; } List<String> res = new ArrayList<String>(2); String pattern = "^(.*?)((?:<[A-Z]*M[A-Z]*>[^<]*?<\\/[A-Z]*M[A-Z]*>\\s*)*)((?:<[A-Z]*[NO]+[A-Z]*>[^<]*?<\\/[A-Z]*[NO]+[A-Z]*>\\s*)+)<B>[,:\\.;]<\\/B>\\s*$"; String sentenceCopy = "" + sentence; sentenceCopy = sentenceCopy.replaceAll("></?", ""); Matcher m = StringUtility.createMatcher(sentenceCopy, pattern); if (m.find()) { String head = m.group(1); String modifier = m.group(2); String tag = m.group(3); String prepositionPattern = String.format("\\b(%s)\\b", this.myLearnerUtility.getConstant().PREPOSITION); if (!StringUtility.isMatchedNullSafe(head, prepositionPattern) && !StringUtility.isMatchedNullSafe(head, "<\\/N>") && !StringUtility.isMatchedNullSafe(modifier, prepositionPattern)) { if (tag != null) { Matcher m2 = StringUtility.createMatcher(tag, "(.*?)<N>([^<]+)<\\/N>\\s*$"); if (m2.find()) { modifier = modifier + m2.group(1); tag = m2.group(2); } tag = tag.replaceAll("<\\S+?>", ""); modifier = modifier.replaceAll("<\\S+?>", ""); tag = tag.replaceAll("(^\\s*|\\s*$)", ""); modifier = modifier.replaceAll("(^\\s*|\\s*$)", ""); res.add(modifier); res.add(tag); return res; } } } return res; } }