package edu.berkeley.nlp.syntax; import edu.berkeley.nlp.ling.HeadFinder; import edu.berkeley.nlp.ling.CollinsHeadFinder; import edu.berkeley.nlp.util.CollectionUtils; import edu.berkeley.nlp.util.Pair; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; /** * User: aria42 * Date: Apr 15, 2009 */ public class GrammaticalRelation { public static Pair<Tree<String>,Tree<String>> getSubjectObject(TreePathFinder<String> tpf, Tree<String> vpNode) { if (!vpNode.getLabel().startsWith("VP")) { throw new IllegalArgumentException(vpNode + " is not a VP"); } List<Tree<String>> childs = vpNode.getChildren(); // Subject Tree<String> subj = null; for (Tree<String> node: tpf.getRoot().getPostOrderTraversal()) { if (node.getLabel().startsWith("NP")) { Tree<String> lcaNode = tpf.findLowestCommonAncestor(node,vpNode); if (lcaNode.getLabel().startsWith("S") && tpf.findParent(node) == lcaNode) { subj = node; break; } } } // Object Tree<String> obj = null; for (int c = 0; c < childs.size(); c++) { Tree<String> child = childs.get(c); if (child.isPhrasal() && child.getLabel().startsWith("NP")) { obj = child; break; } } if (subj == null || obj == null) return null; return Pair.newPair(subj,obj); } private final static Set<String> isVerbs = new HashSet<String>(CollectionUtils.makeList("is","was")); public static List<Pair<Tree<String>,Tree<String>>> getPredicateNominativePairs(TreePathFinder<String> tpf, Tree<String> root, HeadFinder hf) { List<Tree<String>> vpNodes = new ArrayList(); for (Tree<String> node : tpf.getRoot().getPostOrderTraversal()) { if (node.getLabel().startsWith("VP")) { Pair<String,String> p = HeadFinder.Utils.getHeadWordAndPartOfSpeechPair(hf, node); String headWord = p.getFirst(); if (isVerbs.contains(headWord.toLowerCase())) { vpNodes.add(node); } } } List<Pair<Tree<String>,Tree<String>>> result = new ArrayList<Pair<Tree<String>, Tree<String>>>(); for (Tree<String> vpNode : vpNodes) { Pair<Tree<String>, Tree<String>> treeTreePair = getSubjectObject(tpf, vpNode); if (treeTreePair != null) result.add(treeTreePair); } return result; } public static void main(String[] args) { Tree<String> t = Trees.PennTreeReader.parseEasy("(ROOT (S (NP (NNP John)) (VP (VBD was) (NP (DT a) (NN man)))))"); TreePathFinder<String> tpf = new TreePathFinder<String>(t); System.out.println(getPredicateNominativePairs(tpf, t, new CollinsHeadFinder())); } }