package edu.stanford.nlp.patterns; import java.util.Properties; import java.util.Map.Entry; import java.util.Set; import edu.stanford.nlp.patterns.GetPatternsFromDataMultiClass.PatternScoring; import edu.stanford.nlp.stats.ClassicCounter; import edu.stanford.nlp.stats.Counter; import edu.stanford.nlp.stats.Counters; import edu.stanford.nlp.stats.TwoDimensionalCounter; import edu.stanford.nlp.util.logging.Redwood; public class ScorePatternsFreqBased<E> extends ScorePatterns<E> { public ScorePatternsFreqBased( ConstantsAndVariables constVars, PatternScoring patternScoring, String label, Set<CandidatePhrase> allCandidatePhrases, TwoDimensionalCounter<E, CandidatePhrase> patternsandWords4Label, TwoDimensionalCounter<E, CandidatePhrase> negPatternsandWords4Label, TwoDimensionalCounter<E, CandidatePhrase> unLabeledPatternsandWords4Label, Properties props) { super(constVars, patternScoring, label, allCandidatePhrases, patternsandWords4Label, negPatternsandWords4Label, unLabeledPatternsandWords4Label, props); } @Override public void setUp(Properties props){} @Override public Counter<E> score() { Counter<E> currentPatternWeights4Label = new ClassicCounter<>(); Counter<E> pos_i = new ClassicCounter<>(); Counter<E> neg_i = new ClassicCounter<>(); Counter<E> unlab_i = new ClassicCounter<>(); for (Entry<E, ClassicCounter<CandidatePhrase>> en : negPatternsandWords4Label .entrySet()) { neg_i.setCount(en.getKey(), en.getValue().size()); } for (Entry<E, ClassicCounter<CandidatePhrase>> en : unLabeledPatternsandWords4Label .entrySet()) { unlab_i.setCount(en.getKey(), en.getValue().size()); } for (Entry<E, ClassicCounter<CandidatePhrase>> en : patternsandWords4Label .entrySet()) { pos_i.setCount(en.getKey(), en.getValue().size()); } Counter<E> all_i = Counters.add(pos_i, neg_i); all_i.addAll(unlab_i); // for (Entry<Integer, ClassicCounter<String>> en : allPatternsandWords4Label // .entrySet()) { // all_i.setCount(en.getKey(), en.getValue().size()); // } Counter<E> posneg_i = Counters.add(pos_i, neg_i); Counter<E> logFi = new ClassicCounter<>(pos_i); Counters.logInPlace(logFi); if (patternScoring.equals(PatternScoring.RlogF)) { currentPatternWeights4Label = Counters.product( Counters.division(pos_i, all_i), logFi); } else if (patternScoring.equals(PatternScoring.RlogFPosNeg)) { Redwood.log("extremePatDebug", "computing rlogfposneg"); currentPatternWeights4Label = Counters.product( Counters.division(pos_i, posneg_i), logFi); } else if (patternScoring.equals(PatternScoring.RlogFUnlabNeg)) { Redwood.log("extremePatDebug", "computing rlogfunlabeg"); currentPatternWeights4Label = Counters.product( Counters.division(pos_i, Counters.add(neg_i, unlab_i)), logFi); } else if (patternScoring.equals(PatternScoring.RlogFNeg)) { Redwood.log("extremePatDebug", "computing rlogfneg"); currentPatternWeights4Label = Counters.product( Counters.division(pos_i, neg_i), logFi); } else if (patternScoring.equals(PatternScoring.YanGarber02)) { Counter<E> acc = Counters.division(pos_i, Counters.add(pos_i, neg_i)); double thetaPrecision = 0.8; Counters.retainAbove(acc, thetaPrecision); Counter<E> conf = Counters.product( Counters.division(pos_i, all_i), logFi); for (E p : acc.keySet()) { currentPatternWeights4Label.setCount(p, conf.getCount(p)); } } else if (patternScoring.equals(PatternScoring.LinICML03)) { Counter<E> acc = Counters.division(pos_i, Counters.add(pos_i, neg_i)); double thetaPrecision = 0.8; Counters.retainAbove(acc, thetaPrecision); Counter<E> conf = Counters.product(Counters.division( Counters.add(pos_i, Counters.scale(neg_i, -1)), all_i), logFi); for (E p : acc.keySet()) { currentPatternWeights4Label.setCount(p, conf.getCount(p)); } } else { throw new RuntimeException("not implemented " + patternScoring + " . check spelling!"); } return currentPatternWeights4Label; } }