package edu.stanford.nlp.semparse.open.model; import java.util.*; import edu.stanford.nlp.semparse.open.dataset.Example; import edu.stanford.nlp.semparse.open.model.candidate.Candidate; import edu.stanford.nlp.semparse.open.util.Multiset; import fig.basic.LogInfo; public class FeatureCountPruner implements FeatureMatcher { public Multiset<String> counts = new Multiset<>(); public boolean beVeryQuiet; public FeatureCountPruner(boolean beVeryQuiet) { this.beVeryQuiet = beVeryQuiet; } /** * Add features from the example to the count. * * The same feature within the same example counts as 1 feature. */ public void add(Example example) { if (!beVeryQuiet) LogInfo.begin_track("Collecting features from %s ...", example); Set<String> uniqued = new HashSet<>(); for (Candidate candidate : example.candidates) { for (String name : candidate.getCombinedFeatures().keySet()) { uniqued.add(name); } } for (String name : uniqued) counts.add(name); if (!beVeryQuiet) LogInfo.end_track(); } /** * Prune the features with count < minimumCount */ public void applyThreshold(int minimumCount) { if (!beVeryQuiet) LogInfo.begin_track("Pruning features with count < %d ...", minimumCount); if (!beVeryQuiet) LogInfo.logs("Original #Features: %d", counts.elementSet().size()); counts = counts.getPrunedByCount(minimumCount); if (!beVeryQuiet) LogInfo.logs("Pruned #Features: %d", counts.elementSet().size()); if (!beVeryQuiet) LogInfo.end_track(); } @Override public boolean matches(String feature) { return counts.contains(feature); } }