package edu.stanford.nlp.parser.metrics;
import java.util.Set;
import edu.stanford.nlp.trees.Constituent;
import edu.stanford.nlp.trees.ConstituentFactory;
import edu.stanford.nlp.trees.LabeledScoredConstituentFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TreeFilters;
import java.util.function.Predicate;
import edu.stanford.nlp.util.Generics;
/**
* An AbstractEval which doesn't just evaluate all constituents, but
* lets you provide a filter to only pay attention to constituents
* formed from certain subtrees. For example, one provided filter
* lets you limit the evaluation to subtrees which contain a
* particular kind of node.
*
* @author John Bauer
*/
public class FilteredEval extends AbstractEval {
Predicate<Tree> subtreeFilter;
private final ConstituentFactory cf = new LabeledScoredConstituentFactory();
public FilteredEval(String str, boolean runningAverages, Predicate<Tree> subtreeFilter) {
super(str, runningAverages);
this.subtreeFilter = subtreeFilter;
}
protected Set<?> makeObjects(Tree tree) {
Set<Constituent> set = Generics.newHashSet();
if (tree != null) {
set.addAll(tree.constituents(cf, false, subtreeFilter));
}
return set;
}
/**
* Returns an eval which is good for counting the attachment of
* specific node types. For example, suppose you want to count the
* attachment of PP in an English parsing. You could create one
* with PP as the child pattern, and then it would give you p/r/f1
* for just nodes which have a PP as a child.
*/
public static FilteredEval childFilteredEval(String str, boolean runningAverages, TreebankLanguagePack tlp, String childPattern) {
return new FilteredEval(str, runningAverages, new TreeFilters.HasMatchingChild(tlp, childPattern));
}
}