package quickml.supervised.tree.decisionTree.attributeValueIgnoringStrategies; import quickml.supervised.tree.attributeValueIgnoringStrategies.AttributeValueIgnoringStrategy; import quickml.supervised.tree.decisionTree.valueCounters.ClassificationCounter; import java.io.Serializable; import java.util.Map; /** * Created by alexanderhawk on 3/18/15. */ public class BinaryClassAttributeValueIgnoringStrategy implements AttributeValueIgnoringStrategy<ClassificationCounter> { private final int minOccurancesOfAttributeValue; private final Serializable minorityClassification; private final Serializable majorityClassification; private final double majorityToMinorityRatio; public BinaryClassAttributeValueIgnoringStrategy(ClassificationCounter cc, final int minOccurancesOfAttributeValue) { this.minOccurancesOfAttributeValue = minOccurancesOfAttributeValue; this.majorityClassification = ClassificationCounter.getMostPopularClass(cc); this.minorityClassification = ClassificationCounter.getLeastPopularClass(cc); this.majorityToMinorityRatio = cc.getCount(majorityClassification)/cc.getCount(minorityClassification); } public boolean shouldWeIgnoreThisValue(final ClassificationCounter termStatistics) { Map<Serializable, Double> counts = termStatistics.getCounts(); if (counts.containsKey(minorityClassification) && counts.get(minorityClassification) > minOccurancesOfAttributeValue) { return false; } if (counts.containsKey(majorityClassification) && counts.get(majorityClassification) > majorityToMinorityRatio * minOccurancesOfAttributeValue) { return false; } if (hasBothClassifications(counts) && hasSufficientStatisticsForBothClassifications(counts)) { return false; } return true; } private boolean hasSufficientStatisticsForBothClassifications(Map<Serializable, Double> counts) { return counts.get(majorityClassification) > 0.6 * majorityToMinorityRatio * minOccurancesOfAttributeValue && counts.get(minorityClassification) > 0.6 * minOccurancesOfAttributeValue; } private boolean hasBothClassifications(Map<Serializable, Double> counts) { return counts.containsKey(majorityClassification) && counts.containsKey(minorityClassification); } }