/**
*
*/
package edu.berkeley.nlp.conll;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import edu.berkeley.nlp.PCFGLA.Option;
import edu.berkeley.nlp.PCFGLA.OptionParser;
import edu.berkeley.nlp.PCFGLA.GrammarTrainer.Options;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees.PennTreeReader;
/**
* @author petrov
*
*/
public class PTBtoDep {
public static class Options {
@Option(name = "-in", required = true, usage = "Input File for Trees (Required)")
public String inFileName;
}
public static void main(String[] args) {
OptionParser optParser = new OptionParser(Options.class);
Options opts = (Options) optParser.parse(args, true);
// provide feedback on command-line arguments
// System.out.println("Calling with " + optParser.getPassedInOptions());
String fileName = opts.inFileName;
try {
PennTreeReader treeReader = new PennTreeReader(new InputStreamReader(new FileInputStream(fileName), Charset.forName("UTF-8")));//GB18030")));
while (treeReader.hasNext()) {
Tree<String> rootedTree = treeReader.next();
// if (rootedTree.getChildren().size()>1)
// System.err.println(rootedTree);
if (rootedTree.getLabel().equals("ROOT"))
rootedTree = rootedTree.getChildren().get(0);
printDependencies(rootedTree);
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* @param sentence
*/
private static void printDependencies(Tree<String> tree) {
// System.out.println(tree);
if (tree.getYield().size()<=1) {
System.out.println(0+"\t_\t_\t_");
return;
}
int thisHead = findHead(tree);
int nWordsFound = printDependencies(tree, thisHead, 0, 0);
int nWords = tree.getYield().size();
while (nWords<nWordsFound){
nWordsFound++;
System.out.println(0+"\t_\t_\t_");
System.err.println("too short");
}
System.out.println("");
}
private static int printDependencies(Tree<String> tree, int parent, int previousWords, int parentOfParent) {
for (Tree<String> child : tree.getChildren()){
if (previousWords==parent-1){ // we are at the parent of this (sub)tree
System.out.println(parentOfParent+"\t_\t_\t_");
// (previousWords+1) +
// "\t" + child.getChildren().get(0).getLabel() +
// "\t" + child.getLabel()+
// "\t" + parentOfParent);
if (child.getYield().size()>1)
System.err.println(child);
previousWords++;
} else if (child.isPreTerminal()){
System.out.println(parent+"\t_\t_\t_");
// (previousWords+1) +
// "\t" + child.getChildren().get(0).getLabel() +
// "\t" + child.getLabel()+
// "\t" + parent);
previousWords++;
} else {
int thisHead = previousWords + findHead(child);
printDependencies(child, thisHead, previousWords, parent);
previousWords += child.getYield().size();
}
}
return previousWords;
}
/**
* @param tree
* @return
*/
private static int findHead(Tree<String> tree) {
String headLabel = tree.getLabel();
headLabel = headLabel.substring(0,headLabel.length()-1);//cut off the *
int headIndex = -2;
int previousWords = 0;
for (Tree<String> child : tree.getChildren()){
if (child.isPreTerminal()&& child.getLabel().equals(headLabel)){ // found a potential head
headIndex = previousWords;
previousWords++;
}
else previousWords += child.getYield().size();
}
return headIndex+1; //+1 since indices start with 1
}
}