package quickml.supervised.tree.branchFinders; import com.google.common.base.Optional; import quickml.supervised.tree.attributeValueIgnoringStrategies.AttributeValueIgnoringStrategy; import quickml.supervised.tree.scorers.ScorerFactory; import quickml.supervised.tree.summaryStatistics.ValueCounter; import quickml.supervised.tree.scorers.GRImbalancedScorer; import quickml.supervised.tree.attributeIgnoringStrategies.AttributeIgnoringStrategy; import quickml.supervised.tree.reducers.AttributeStats; import quickml.supervised.tree.nodes.Branch; import quickml.supervised.tree.branchingConditions.BranchingConditions; import java.util.Collection; /** * Created by alexanderhawk on 4/5/15. */ public abstract class NumericBranchFinder<VC extends ValueCounter<VC>> extends BranchFinder<VC> { public NumericBranchFinder(Collection<String> candidateAttributes, BranchingConditions<VC> branchingConditions, ScorerFactory<VC> scorerFactory, AttributeValueIgnoringStrategy<VC> attributeValueIgnoringStrategy, AttributeIgnoringStrategy attributeIgnoringStrategy) { super(candidateAttributes, branchingConditions, scorerFactory, attributeValueIgnoringStrategy, attributeIgnoringStrategy); } @Override public Optional<? extends Branch<VC>> getBranch(Branch<VC> parent, AttributeStats<VC> attributeStats) { if (attributeStats.getStatsOnEachValue().size()<=1) { return Optional.absent(); } Optional<SplittingUtils.SplitScore> splitScoreOptional = SplittingUtils.splitSortedAttributeStats(attributeStats, scorerFactory, branchingConditions, attributeValueIgnoringStrategy, false); if (!splitScoreOptional.isPresent()) { SplittingUtils.splitSortedAttributeStats(attributeStats, scorerFactory, branchingConditions, attributeValueIgnoringStrategy, false); return Optional.absent(); } SplittingUtils.SplitScore splitScore = splitScoreOptional.get(); //TODO: make a hyper-parameter for alpha on the following line as it is leads to better performance in cases tested, but may not generalize //splitScore.score=splitScore.score*alpha; //value around 2 often works well. double bestThreshold = (Double)attributeStats.getStatsOnEachValue().get(splitScore.indexOfLastValueCounterInTrueSet).getAttrVal(); return createBranch(parent, attributeStats, splitScore, bestThreshold); } protected abstract Optional<? extends Branch<VC>> createBranch(Branch<VC> parent, AttributeStats<VC> attributeStats, SplittingUtils.SplitScore splitScore, double bestThreshold); }