package edu.stanford.nlp.trees; import edu.stanford.nlp.ling.LabelFactory; import edu.stanford.nlp.util.StringUtils; import java.util.List; import java.util.Properties; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.FileInputStream; import java.io.IOException; /** * Transforms an English structure parse tree in order to get the dependencies right: * Adds an extra structure in QP phrases: * (QP (RB well) (IN over) (CD 9)) becomes * * (QP (XS (RB well) (IN over)) (CD 9)) * * * @author mcdm */ public class QPTreeTransformer implements TreeTransformer { /** * Right now (July 2007) we only deal with the following QP structures: * * QP (RB IN CD|DT ...) well over, more than * QP (JJR IN CD|DT ...) fewer than * QP (IN JJS CD|DT ...) at least * * @param t tree to be transformed * @return t with an extra layer if there was a QP structure matching the ones mentioned above */ public Tree transformTree(Tree t) { return QPtransform(t); } /** * Transforms t if it contains one of the following QP structure: * QP (RB IN CD|DT ...) well over, more than * QP (JJR IN CD|DT ...) fewer than * QP (IN JJS CD|DT ...) at least * * @param t a tree to be transformed * @return t transformed */ public static Tree QPtransform(Tree t) { doTransform(t); return t; } /* * Given a tree t, if this tree contains a QP of the form * QP (RB IN CD|DT ...) well over, more than * QP (JJR IN CD|DT ...) fewer than * QP (IN JJS CD|DT ...) at least * * it will transform it * */ private static void doTransform(Tree t) { if (t.value().startsWith("QP")) { //look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3 && children.get(0).isPreTerminal()) { //go through the children and check if they match the structure we want String child1 = children.get(0).value(); String child2 = children.get(1).value(); String child3 = children.get(2).value(); if((child3.startsWith("CD") || child3.startsWith("DT")) && (child1.startsWith("RB") || child1.startsWith("JJ") || child1.startsWith("IN")) && (child2.startsWith("IN") || child2.startsWith("JJ"))) { transformQP(t); } } /* --- to be written or deleted } else if (t.value().startsWith("NP")) { //look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3) { } ---- */ } else if (t.isPhrasal()) { for (Tree child : t.getChildrenAsList()) { doTransform(child); } } } private static void transformQP(Tree t) { List<Tree> children = t.getChildrenAsList(); TreeFactory tf = t.treeFactory(); LabelFactory lf = t.label().labelFactory(); //create the new XS having the first two children of the QP Tree left = tf.newTreeNode(lf.newLabel("XS"), null); for (int i = 0; i < 2; i++) { left.addChild(children.get(i)); } // remove all the two first children of t before for (int i = 0; i < 2; i++) { t.removeChild(0); } // add XS as the first child t.addChild(0, left); } public static void main(String[] args) { QPTreeTransformer transformer = new QPTreeTransformer(); Treebank tb = new MemoryTreebank(); Properties props = StringUtils.argsToProperties(args); String treeFileName = props.getProperty("treeFile"); if (treeFileName != null) { try { TreeReader tr = new PennTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFileName))), new LabeledScoredTreeFactory()); Tree t; while ((t = tr.readTree()) != null) { tb.add(t); } } catch (IOException e) { throw new RuntimeException("File problem: " + e); } } for (Tree t : tb) { System.out.println("Original tree"); t.pennPrint(); System.out.println(); System.out.println("Tree transformed"); Tree tree = transformer.transformTree(t); tree.pennPrint(); System.out.println(); System.out.println("----------------------------"); } } }