package edu.stanford.nlp.trees.international.tuebadz; import java.util.ArrayList; import java.util.List; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeFactory; import edu.stanford.nlp.trees.TreeNormalizer; import edu.stanford.nlp.trees.TreebankLanguagePack; /** * Tree normalizer for the TueBaDZ treebank. * * (An adaptation of Roger Levy's NegraPennTreeNormalizer.) * * @author Wolfgang Maier (wmaier@sfs.uni-tuebingen.de) */ public class TueBaDZPennTreeNormalizer extends TreeNormalizer { /** How to clean up node labels: 0 = do nothing, 1 = keep category and * function, 2 = just category. */ private final int nodeCleanup; private final String root; protected final TreebankLanguagePack tlp; private List<TreeNormalizer> tns = new ArrayList<>(); public String rootSymbol() { return root; } // public TueBaDZPennTreeNormalizer() { // this(new TueBaDZLanguagePack(), 0); // } public TueBaDZPennTreeNormalizer(TreebankLanguagePack tlp, int nodeCleanup) { this.tlp = tlp; this.nodeCleanup = nodeCleanup; root = tlp.startSymbol(); } public TueBaDZPennTreeNormalizer(TreebankLanguagePack tlp, int nodeCleanup, List<TreeNormalizer> tns) { this.tlp = tlp; this.nodeCleanup = nodeCleanup; root = tlp.startSymbol(); this.tns.addAll(tns); } /** * Normalizes a leaf contents. * This implementation interns the leaf. */ @Override public String normalizeTerminal(String leaf) { // We could unquote * and / with backslash \ in front of them return leaf.intern(); } /** * Normalizes a nonterminal contents. * This implementation strips functional tags, etc. and interns the * nonterminal. */ @Override public String normalizeNonterminal(String category) { return cleanUpLabel(category).intern(); } /** * Remove things like hyphened functional tags and equals from the * end of a node label. */ protected String cleanUpLabel(String label) { if (label == null) { return root; } else if (nodeCleanup == 1) { return tlp.categoryAndFunction(label); } else if (nodeCleanup == 2) { return tlp.basicCategory(label); } else { return label; } } /** * Normalize a whole tree. * TueBa-D/Z adaptation. Fixes trees with non-unary roots, does nothing else. */ @Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { if (tree.label().value().equals(root) && tree.children().length > 1) { Tree underRoot = tree.treeFactory().newTreeNode(root, tree.getChildrenAsList()); tree.setChildren(new Tree[1]); tree.setChild(0, underRoot); } // we just want the non-unary root fixed. return tree; } private static final long serialVersionUID = 8009544230321390490L; }