package edu.stanford.nlp.trees;
import edu.stanford.nlp.ling.LabelFactory;
import edu.stanford.nlp.util.StringUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.io.IOException;
/**
* Transforms an English structure parse tree in order to get the dependencies right:
* Adds an extra structure in QP phrases:
* <br>
* (QP (RB well) (IN over) (CD 9)) becomes
* <br>
* (QP (XS (RB well) (IN over)) (CD 9))
* <br>
* (QP (...) (CC ...) (...)) becomes
* <br>
* (QP (NP ...) (CC ...) (NP ...))
*
*
* @author mcdm
*/
public class QPTreeTransformer implements TreeTransformer {
/**
* Right now (Jan 2013) we only deal with the following QP structures:
* <ul>
* <li> QP (RB IN CD|DT ...) well over, more than
* <li> QP (JJR IN CD|DT ...) fewer than
* <li> QP (IN JJS CD|DT ...) at least
* <li> QP (... CC ...) between 5 and 10
* </ul>
*
* @param t tree to be transformed
* @return The tree t with an extra layer if there was a QP structure matching the ones mentioned above
*/
@Override
public Tree transformTree(Tree t) {
return QPtransform(t);
}
/**
* Transforms t if it contains one of the following QP structure:
* QP (RB IN CD|DT ...) well over, more than
* QP (JJR IN CD|DT ...) fewer than
* QP (IN JJS CD|DT ...) at least
* QP (... CC ...) between 5 and 10
*
* @param t a tree to be transformed
* @return t transformed
*/
public static Tree QPtransform(Tree t) {
doTransform(t);
return t;
}
/**
* Given a tree t, if this tree contains a QP of the form
* QP (RB IN CD|DT ...) well over, more than
* QP (JJR IN CD|DT ...) fewer than
* QP (IN JJS CD|DT ...) at least
* QP (... CC ...) between 5 and 10
* it will transform it
*
*/
private static void doTransform(Tree t) {
if (t.value().startsWith("QP")) {
//look at the children
List<Tree> children = t.getChildrenAsList();
if (children.size() >= 3 && children.get(0).isPreTerminal()) {
//go through the children and check if they match the structure we want
String child1 = children.get(0).value();
String child2 = children.get(1).value();
String child3 = children.get(2).value();
if((child3.startsWith("CD") || child3.startsWith("DT")) &&
(child1.startsWith("RB") || child1.startsWith("JJ") || child1.startsWith("IN")) &&
(child2.startsWith("IN") || child2.startsWith("JJ"))) {
transformQP(t);
children = t.getChildrenAsList();
}
}
// If the children include a CC, we split that into left and
// right subtrees with the CC in the middle so the headfinders
// have an easier time interpreting the tree later on
if (children.size() >= 3) {
boolean isFlat = isFlat(children);
if (isFlat) {
for (int i = 1; i < children.size() - 1; ++i) {
if (children.get(i).value().startsWith("CC")) {
transformCC(t, children.subList(0, i), children.get(i), children.subList(i + 1, children.size()));
children = t.getChildrenAsList();
isFlat = false;
break;
}
}
}
if (isFlat) {
boolean isMoney = children.get(0).value().startsWith("$");
if (isMoney) {
for (int i = 1; i < children.size(); ++i) {
if (!children.get(i).value().startsWith("CD")) {
isMoney = false;
break;
}
}
}
if (isMoney) {
transformMoney(t, children);
}
}
}
/* --- to be written or deleted
} else if (t.value().startsWith("NP")) {
//look at the children
List<Tree> children = t.getChildrenAsList();
if (children.size() >= 3) {
}
---- */
} else if (t.isPhrasal()) {
for (Tree child : t.children()) {
doTransform(child);
}
}
}
private static boolean isFlat(List<Tree> children) {
for (int i = 0; i < children.size(); ++i) {
if (!children.get(i).isPreTerminal()) {
return false;
}
}
return true;
}
private static void transformCC(Tree t, List<Tree> left, Tree conj, List<Tree> right) {
TreeFactory tf = t.treeFactory();
LabelFactory lf = t.label().labelFactory();
Tree leftQP = tf.newTreeNode(lf.newLabel("NP"), left);
Tree rightQP = tf.newTreeNode(lf.newLabel("NP"), right);
List<Tree> newChildren = new ArrayList<Tree>();
newChildren.add(leftQP);
newChildren.add(conj);
newChildren.add(rightQP);
t.setChildren(newChildren);
}
private static void transformMoney(Tree t, List<Tree> children) {
TreeFactory tf = t.treeFactory();
LabelFactory lf = t.label().labelFactory();
Tree rightQP = tf.newTreeNode(lf.newLabel("QP"), children.subList(1, children.size()));
List<Tree> newChildren = new ArrayList<Tree>();
newChildren.add(children.get(0));
newChildren.add(rightQP);
t.setChildren(newChildren);
}
private static void transformQP(Tree t) {
List<Tree> children = t.getChildrenAsList();
TreeFactory tf = t.treeFactory();
LabelFactory lf = t.label().labelFactory();
//create the new XS having the first two children of the QP
Tree left = tf.newTreeNode(lf.newLabel("XS"), null);
for (int i = 0; i < 2; i++) {
left.addChild(children.get(i));
}
// remove all the two first children of t before
for (int i = 0; i < 2; i++) {
t.removeChild(0);
}
// add XS as the first child
t.addChild(0, left);
}
public static void main(String[] args) {
QPTreeTransformer transformer = new QPTreeTransformer();
Treebank tb = new MemoryTreebank();
Properties props = StringUtils.argsToProperties(args);
String treeFileName = props.getProperty("treeFile");
if (treeFileName != null) {
try {
TreeReader tr = new PennTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFileName))), new LabeledScoredTreeFactory());
Tree t;
while ((t = tr.readTree()) != null) {
tb.add(t);
}
} catch (IOException e) {
throw new RuntimeException("File problem: " + e);
}
}
for (Tree t : tb) {
System.out.println("Original tree");
t.pennPrint();
System.out.println();
System.out.println("Tree transformed");
Tree tree = transformer.transformTree(t);
tree.pennPrint();
System.out.println();
System.out.println("----------------------------");
}
}
}