/** * */ package rainbownlp.machinelearning.featurecalculator.link; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import rainbownlp.analyzer.sentenceclause.Clause; import rainbownlp.analyzer.sentenceclause.SentenceClauseManager; import rainbownlp.core.Artifact; import rainbownlp.core.FeatureValuePair; import rainbownlp.core.Phrase; import rainbownlp.core.PhraseLink; import rainbownlp.core.FeatureValuePair.FeatureName; import rainbownlp.machinelearning.IFeatureCalculator; import rainbownlp.machinelearning.MLExample; import rainbownlp.machinelearning.MLExampleFeature; import rainbownlp.parser.DependencyLine; import rainbownlp.util.HibernateUtil; import rainbownlp.util.StanfordDependencyUtil; import rainbownlp.util.StringUtil; /** * @author Azadeh * */ public class ParseDependencyFeatures implements IFeatureCalculator { public static void main (String[] args) throws Exception { //// List<MLExample> trainExamples = //// MLExample.getAllExamples(LinkExampleBuilder.ExperimentGroupTimexEvent, true); // List<MLExample> trainExamples2 = // MLExample.getAllExamples(LinkExampleBuilder.ExperimentGroupEventEvent, true,100); // List<MLExample> all_train_examples = new ArrayList<MLExample>(); //// all_train_examples.addAll(trainExamples); // all_train_examples.addAll(trainExamples2); // for (MLExample example:all_train_examples) // { //// if(example.getExampleId() != 4893) continue; // ParseDependencyFeatures lbf = new ParseDependencyFeatures(); // lbf.calculateFeatures(example); // } } @Override public void calculateFeatures(MLExample exampleToProcess) throws Exception { PhraseLink phraseLink = exampleToProcess.getRelatedPhraseLink(); Phrase phrase1 = Phrase.getInstance(phraseLink.getFromPhrase().getPhraseId()); Phrase phrase2 = Phrase.getInstance(phraseLink.getToPhrase().getPhraseId()); Artifact parent_sent = phrase1.getStartArtifact().getParentArtifact(); SentenceClauseManager clauseManager = new SentenceClauseManager(parent_sent); ArrayList<DependencyLine> dep_lines = clauseManager.sentDepLines; String rel_prep1 = getRelPrepositionToPhrase(phrase1,dep_lines); if (rel_prep1==null) { rel_prep1="no-preposition"; } FeatureValuePair fromPhraserelPrep = FeatureValuePair.getInstance (FeatureName.FromPhraseRelPrep, rel_prep1,"1"); MLExampleFeature.setFeatureExample(exampleToProcess, fromPhraserelPrep); //To phrase related prep String rel_prep2 = getRelPrepositionToPhrase(phrase2,dep_lines); if (rel_prep2==null) { rel_prep2="no-preposition"; } FeatureValuePair toPhraserelPrep = FeatureValuePair.getInstance (FeatureName.ToPhraseRelPrep, rel_prep2,"1"); MLExampleFeature.setFeatureExample(exampleToProcess, toPhraserelPrep); //////////////////////////////////////////////////////////////// boolean are_gov_connected = areGovVerbsDirectlyConnected(phrase1, phrase2, clauseManager); FeatureValuePair gov_connected_feature = FeatureValuePair.getInstance (FeatureName.areGovVerbsConnected, are_gov_connected?"1":"0"); MLExampleFeature.setFeatureExample(exampleToProcess, gov_connected_feature); ///// /////////////////////////////////////////////////////////// String gov_verb1 = getGovernorVerb(phrase1,clauseManager); String gov_verb2 = getGovernorVerb(phrase2,clauseManager); if (gov_verb1 ==null) { gov_verb1 ="#notFound#"; } if (gov_verb2 ==null) { gov_verb2 ="#notFound#"; } FeatureValuePair fromGovVerb = FeatureValuePair.getInstance (FeatureName.FromPhraseGovVerb, gov_verb1, "1"); MLExampleFeature.setFeatureExample(exampleToProcess, fromGovVerb); FeatureValuePair toGovVerb = FeatureValuePair.getInstance (FeatureName.ToPhraseGovVerb, gov_verb2, "1"); MLExampleFeature.setFeatureExample(exampleToProcess, toGovVerb); //get verb ausxilaries List<String> from_verb_auxs = getVerbAuxilaries(phrase1, clauseManager); for (String verb_aux: from_verb_auxs) { FeatureValuePair fromverbAuxFeatures = FeatureValuePair.getInstance (FeatureName.FromPhraseGovVerbAux, verb_aux,"1"); MLExampleFeature.setFeatureExample(exampleToProcess, fromverbAuxFeatures); } List<String> to_verb_auxs = getVerbAuxilaries(phrase2, clauseManager); for (String verb_aux: to_verb_auxs) { FeatureValuePair toverbAuxFeatures = FeatureValuePair.getInstance (FeatureName.toPhraseGovVerbAux, verb_aux,"1"); MLExampleFeature.setFeatureExample(exampleToProcess, toverbAuxFeatures); } } public static String getRelPrepositionToPhrase(Phrase pPhrase, ArrayList<DependencyLine> dep_lines) throws SQLException { String related_prep = null; // List<Integer> included_offsets = pPhrase.listWordOffsetsInPhrase(); String sent_dep_string; if (dep_lines ==null) { dep_lines =StanfordDependencyUtil.parseDepLinesFromString (pPhrase.getStartArtifact().getParentArtifact().getStanDependency()); } int head_offset = pPhrase.getHeadArtifact().getWordIndex(); // looking for pre_ relations that have any of the words in the phrase as the gov or for (DependencyLine dep: dep_lines) { String rel_name = dep.relationName.toLowerCase(); if (rel_name.startsWith("prep_")) { if (head_offset == dep.secondOffset-1 || head_offset == dep.firstOffset-1) { if (rel_name.matches("prep_.*")) { related_prep = rel_name.replaceAll("prep_(.*)", "$1"); break; } } } } // check the original line also if (related_prep==null) { Artifact prev_artifact = pPhrase.getStartArtifact().getPreviousArtifact(); if (prev_artifact != null && prev_artifact.getPOS().toLowerCase().equals("dt")) { related_prep = prev_artifact.getContent(); } if (prev_artifact != null && prev_artifact.getPOS().toLowerCase().equals("in")) { related_prep = prev_artifact.getContent(); } } return related_prep; } public static String getGovernorVerb(Phrase pPhrase, SentenceClauseManager pClauseManager ) throws Exception { String gov_verb = null; if (pPhrase.getGovVerb()!=null) { return StringUtil.getWordLemma(pPhrase.getGovVerb().getContent()); } else { // get sentence clauses Artifact head = pPhrase.getHeadArtifact(); SentenceClauseManager clauseManager; if (pClauseManager.getRelatedSentence() != pPhrase.getStartArtifact().getParentArtifact()) { clauseManager = new SentenceClauseManager(pPhrase.getStartArtifact().getParentArtifact()); } else { clauseManager = pClauseManager; } Clause related_clause = clauseManager.clauseMap.get(head.getWordIndex()+1); if (related_clause!=null) { gov_verb = related_clause.clauseVerb.verbMainPart; if (!gov_verb.matches("")) { Artifact gov_verb_artifact = Artifact.findInstance(clauseManager.getRelatedSentence(), related_clause.clauseVerb.offset-1); if (!gov_verb_artifact.getPOS().startsWith("VB")) { gov_verb = null; } else { pPhrase.setGovVerb(gov_verb_artifact); HibernateUtil.save(pPhrase); } } } if (gov_verb ==null || gov_verb.equals("") ) { Artifact ga= pPhrase.calclateGovVerb(); if (ga != null) { gov_verb= ga.getContent(); pPhrase.setGovVerb(ga); HibernateUtil.save(pPhrase); } } } if (gov_verb != null) { gov_verb = StringUtil.getWordLemma(gov_verb); } return gov_verb; } public static String getGovernorVerbPOS(Phrase pPhrase, SentenceClauseManager pClauseManager ) throws Exception { String gov_verb_pos = null; Artifact gov_verb = pPhrase.getGovVerb(); if (gov_verb != null) { return gov_verb.getPOS(); } // get sentence clauses Artifact head = pPhrase.getHeadArtifact(); SentenceClauseManager clauseManager; if (pClauseManager ==null || pClauseManager.getRelatedSentence() != pPhrase.getStartArtifact().getParentArtifact()) { clauseManager = new SentenceClauseManager(pPhrase.getStartArtifact().getParentArtifact()); } else { clauseManager = pClauseManager; } Clause related_clause = clauseManager.clauseMap.get(head.getWordIndex()+1); if (related_clause!=null) { gov_verb_pos = clauseManager.getPOSTag(related_clause.clauseVerb.offset); } // TODO: perform the right action // if (gov_verb==null) // { // throw (new Exception()); // } return gov_verb_pos; } public static Integer getGovernorVerbTense(Phrase pPhrase, SentenceClauseManager pClauseManager ) throws Exception { Integer gov_verb_tense = null; Artifact head; Artifact gov_verb = pPhrase.getGovVerb(); if (gov_verb != null) { head = gov_verb; } else { head = pPhrase.getHeadArtifact(); } // get sentence clauses SentenceClauseManager clauseManager; if (pClauseManager ==null || pClauseManager.getRelatedSentence() != pPhrase.getStartArtifact().getParentArtifact()) { clauseManager = new SentenceClauseManager(pPhrase.getStartArtifact().getParentArtifact()); } else { clauseManager = pClauseManager; } Clause related_clause = clauseManager.clauseMap.get(head.getWordIndex()+1); if (related_clause!=null) { gov_verb_tense = related_clause.clauseVerb.getTense(clauseManager); } return gov_verb_tense; } public static List<String> getVerbAuxilaries(Phrase pPhrase, SentenceClauseManager pClauseManager ) throws Exception { List<String> aux = new ArrayList<String>(); Clause related_clause = getPhraseRelatedClause(pPhrase,pClauseManager); if (related_clause!=null) { aux =related_clause.clauseVerb.auxs; } return aux; } public static Clause getPhraseRelatedClause(Phrase pPhrase, SentenceClauseManager pClauseManager) throws Exception { // get sentence clauses Artifact head; Artifact gov_verb = pPhrase.getGovVerb(); if (gov_verb != null) { head = gov_verb; } else { head = pPhrase.getHeadArtifact(); } SentenceClauseManager clauseManager; if (pClauseManager.getRelatedSentence() != pPhrase.getStartArtifact().getParentArtifact()) { clauseManager = new SentenceClauseManager(pPhrase.getStartArtifact().getParentArtifact()); } else { clauseManager = pClauseManager; } Clause related_clause = clauseManager.clauseMap.get(head.getWordIndex()+1); return related_clause; } public static boolean areGovVerbsDirectlyConnected(Phrase pPhrase1, Phrase pPhrase2,SentenceClauseManager pClauseManager) throws Exception { boolean are_connected = false; Artifact gov_verb1_artifact = pPhrase1.getGovVerb(); Artifact gov_verb2_artifact = pPhrase2.getGovVerb(); String p1_gov_verp=null; Integer p1_gov_verb_offset=null; String p2_gov_verp = null; Integer p2_gov_verb_offset=null; if (gov_verb1_artifact != null) { p1_gov_verp = gov_verb1_artifact.getContent(); p1_gov_verb_offset = gov_verb1_artifact.getWordIndex()+1; } if (gov_verb2_artifact != null) { p2_gov_verp = gov_verb2_artifact.getContent(); p2_gov_verb_offset = gov_verb2_artifact.getWordIndex()+1; } SentenceClauseManager clauseManager; Artifact head2 = pPhrase2.getHeadArtifact(); if (pClauseManager.getRelatedSentence() != pPhrase1.getStartArtifact().getParentArtifact()) { clauseManager = new SentenceClauseManager(pPhrase1.getStartArtifact().getParentArtifact()); } else { clauseManager = pClauseManager; } if (p1_gov_verp ==null) { Artifact head1 = pPhrase1.getHeadArtifact(); Clause related_clause1 = clauseManager.clauseMap.get(head1.getWordIndex()+1); if (related_clause1!=null) { p1_gov_verp = related_clause1.clauseVerb.verbMainPart; p1_gov_verb_offset = related_clause1.clauseVerb.offset; } } if (p1_gov_verp ==null) { Clause related_clause2 = clauseManager.clauseMap.get(head2.getWordIndex()+1); if (related_clause2!=null) { p2_gov_verp = related_clause2.clauseVerb.verbMainPart; p2_gov_verb_offset = related_clause2.clauseVerb.offset; } } if (p2_gov_verp != null && p1_gov_verp!=null && p1_gov_verp.equals(p2_gov_verp) && p1_gov_verb_offset==p2_gov_verb_offset) { are_connected = true; return are_connected; } if (p2_gov_verp != null && p1_gov_verp!=null ) { are_connected = StanfordDependencyUtil.haveDirectRelation(p1_gov_verp,p1_gov_verb_offset, p2_gov_verp,p2_gov_verb_offset,clauseManager.sentDepLines); } return are_connected; } }