package edu.stanford.nlp.trees.international.hebrew;
import java.io.Serializable;
import java.util.Collections;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import java.util.function.Predicate;
/**
*
* @author Spence Green
*
*/
public class HebrewTreeNormalizer extends BobChrisTreeNormalizer {
private static final long serialVersionUID = -3129547164200725933L;
private final Predicate<Tree> hebrewEmptyFilter;
public HebrewTreeNormalizer() {
super(new HebrewTreebankLanguagePack());
hebrewEmptyFilter = new HebrewEmptyFilter();
}
/**
* Remove traces and pronoun deletion markers.
*/
public static class HebrewEmptyFilter implements Predicate<Tree>, Serializable {
private static final long serialVersionUID = -7256461296718287280L;
public boolean test(Tree t) {
return ! (t.isPreTerminal() && t.value().equals("-NONE-"));
}
}
@Override
public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
tree = tree.prune(hebrewEmptyFilter, tf).spliceOut(aOverAFilter, tf);
//Add start symbol so that the root has only one sub-state. Escape any enclosing brackets.
//If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method
//will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
while(tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1)
tree = tree.firstChild();
if(tree != null && !tree.value().equals(tlp.startSymbol()))
tree = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(tree));
return tree;
}
}