SpanishSplitTreeNormalizer.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.trees.international.spanish;

import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon;
import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern;

/**
 * A tree normalizer made to be used immediately on trees which have
 * been split apart.
 *
 * This is used in AnCora processing in order to fix some common
 * problems with splitting multi-sentence trees.
 *
 * @author Jon Gauthier
 */
public class SpanishSplitTreeNormalizer extends SpanishTreeNormalizer {

  private static final TregexPattern nonsensicalClauseRewrite =
    TregexPattern.compile("sentence=sentence < (S=S !$ /^[^f]/)");
  private static final TsurgeonPattern eraseClause = Tsurgeon.parseOperation("excise S S");

  @Override
  public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
    tree = super.normalizeWholeTree(tree, tf);

    tree = Tsurgeon.processPattern(nonsensicalClauseRewrite, eraseClause, tree);

    return tree;
  }
}