package edu.jhu.agiga; import static edu.jhu.agiga.AgigaSentenceReader.require; import java.io.IOException; import java.io.StringReader; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import edu.jhu.agiga.AgigaConstants.DependencyForm; import edu.stanford.nlp.ling.Label; import edu.stanford.nlp.ling.WordLemmaTag; import edu.stanford.nlp.trees.GrammaticalRelation; import edu.stanford.nlp.trees.LabeledScoredTreeFactory; import edu.stanford.nlp.trees.PennTreeReader; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeFactory; import edu.stanford.nlp.trees.TreeGraphNode; import edu.stanford.nlp.trees.TreeReader; import edu.stanford.nlp.trees.TypedDependency; /** * Extends BasicAgigaSentence to implement methods for constructing the Stanford * parser API objects. * * @author mgormley * */ public class StanfordAgigaSentence extends BasicAgigaSentence implements AgigaSentence, Serializable { public static final long serialVersionUID = 1; private static final Label ROOT_LABEL = new WordLemmaTag("ROOT"); List<TreeGraphNode> nodes = null; public StanfordAgigaSentence(AgigaPrefs prefs) { super(prefs); } // The Stanford TreeGraphNode throws away all but the word from // the WordLemmaTag label in converting it to a CoreLabel. Accordingly // we allow access to the labels here as well. /* (non-Javadoc) * @see edu.jhu.hltcoe.sp.data.depparse.AgigaSentence#getStanfordWordLemmaTags() */ public List<WordLemmaTag> getStanfordWordLemmaTags() { List<AgigaToken> tokens = getTokens(); List<WordLemmaTag> labels = new ArrayList<WordLemmaTag>(); for (int i = 0; i < tokens.size(); i++) { AgigaToken at = tokens.get(i); WordLemmaTag curToken; require(prefs.readWord, "AgigaPrefs.readWord must be true for getStanfordWordLemmaTags()"); if (prefs.readWord && prefs.readLemma && prefs.readPos) { curToken = new WordLemmaTag(at.getWord(), at.getLemma(), at.getPosTag()); } else if (prefs.readWord && prefs.readPos) { curToken = new WordLemmaTag(at.getWord(), at.getPosTag()); } else { // if (prefs.readWord) { curToken = new WordLemmaTag(at.getWord()); } labels.add(curToken); } return labels; } /* (non-Javadoc) * @see edu.jhu.hltcoe.sp.data.depparse.AgigaSentence#getStanfordTypedDependencies(edu.jhu.hltcoe.sp.data.depparse.AgigaConstants.DependencyForm) */ public List<TypedDependency> getStanfordTypedDependencies(DependencyForm form) { List<TypedDependency> dependencies = new ArrayList<TypedDependency>(); if (this.nodes == null) nodes = getStanfordTreeGraphNodes(form); List<AgigaTypedDependency> agigaDeps = getAgigaDeps(form); for (AgigaTypedDependency agigaDep : agigaDeps) { // Add one, since the tokens are zero-indexed but the TreeGraphNodes are one-indexed TreeGraphNode gov = nodes.get(agigaDep.getGovIdx() + 1); TreeGraphNode dep = nodes.get(agigaDep.getDepIdx() + 1); // Create the typed dependency TypedDependency typedDep = new TypedDependency(GrammaticalRelation.valueOf(agigaDep.getType()), gov, dep); dependencies.add(typedDep); } return dependencies; } /* (non-Javadoc) * @see edu.jhu.hltcoe.sp.data.depparse.AgigaSentence#getStanfordTreeGraphNodes(edu.jhu.hltcoe.sp.data.depparse.AgigaConstants.DependencyForm) */ public List<TreeGraphNode> getStanfordTreeGraphNodes(DependencyForm form) { if (this.nodes != null) return this.nodes; this.nodes = new ArrayList<TreeGraphNode>(); // Add an explicit root node nodes.add(new TreeGraphNode(ROOT_LABEL)); List<WordLemmaTag> labels = getStanfordWordLemmaTags(); for (WordLemmaTag curToken : labels) { // Create the tree node TreeGraphNode treeNode = new TreeGraphNode(curToken); treeNode.label().setTag(curToken.tag()); /** * Caution, the order to call is to first setWord(), then setlemma() * From the Stanford source code: * public void setWord(String word) { set(WordAnnotation.class, word); // pado feb 09: if you change the word, delete the lemma. remove(LemmaAnnotation.class); } public void setLemma(String lemma) { set(LemmaAnnotation.class, lemma); } */ treeNode.label().setWord(curToken.word()); treeNode.label().setLemma(curToken.lemma()); nodes.add(treeNode); } List<AgigaTypedDependency> agigaDeps = getAgigaDeps(form); for (AgigaTypedDependency agigaDep : agigaDeps) { // Add one, since the tokens are zero-indexed but the TreeGraphNodes are one-indexed TreeGraphNode gov = nodes.get(agigaDep.getGovIdx() + 1); TreeGraphNode dep = nodes.get(agigaDep.getDepIdx() + 1); // Add gov/dep to TreeGraph gov.addChild(dep); dep.setParent(gov); require(dep.parent() == gov); } return nodes; } /* (non-Javadoc) * @see edu.jhu.hltcoe.sp.data.depparse.AgigaSentence#getStanfordContituencyTree() */ public Tree getStanfordContituencyTree() { TreeFactory tf = new LabeledScoredTreeFactory(); StringReader r = new StringReader(getParseText()); TreeReader tr = new PennTreeReader(r, tf); try { return tr.readTree(); } catch (IOException e) { throw new RuntimeException("Error: IOException should not be thrown by StringReader"); } } }