package edu.stanford.nlp.trees.international.spanish; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeFactory; import edu.stanford.nlp.trees.tregex.TregexPattern; import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon; import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern; /** * A tree normalizer made to be used immediately on trees which have * been split apart. * * This is used in AnCora processing in order to fix some common * problems with splitting multi-sentence trees. * * @author Jon Gauthier */ public class SpanishSplitTreeNormalizer extends SpanishTreeNormalizer { private static final TregexPattern nonsensicalClauseRewrite = TregexPattern.compile("sentence=sentence < (S=S !$ /^[^f]/)"); private static final TsurgeonPattern eraseClause = Tsurgeon.parseOperation("excise S S"); @Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { tree = super.normalizeWholeTree(tree, tf); tree = Tsurgeon.processPattern(nonsensicalClauseRewrite, eraseClause, tree); return tree; } }