package quickml.supervised.tree.scorers;
import quickml.supervised.tree.summaryStatistics.ValueCounter;
import quickml.supervised.tree.reducers.AttributeStats;
import java.io.Serializable;
/**
* The scorerFactory is responsible for assessing the quality of a "split" of data.
*/
public abstract class GRImbalancedScorer<VC extends ValueCounter<VC>> extends GRScorer<VC> implements Serializable{
protected final double imbalancePenaltyPower;
public GRImbalancedScorer(double degreeOfGainRatioPenalty, double imbalancePenaltyPower, AttributeStats<VC> attributeStats) {
super(degreeOfGainRatioPenalty, attributeStats);
this.imbalancePenaltyPower =imbalancePenaltyPower;
}
private double getIntrinsicValue(AttributeStats<VC> attributeStats) {
double intrinsicValue = 0;
double attributeValProb = 0;
for (VC valueCounter : attributeStats.getStatsOnEachValue()) {
if (!valueCounter.isEmpty()) { // if it is empty, it should not be considered.
attributeValProb = valueCounter.getTotal() / attributeStats.getAggregateStats().getTotal();
intrinsicValue -= attributeValProb * Math.log(attributeValProb) / Math.log(2);
}
}
return intrinsicValue;
}
protected double getPenaltyForImabalance(VC a, VC b) {
return 1/Math.pow(Math.max(a.getTotal(), b.getTotal()), imbalancePenaltyPower);
}
/**
* @return A score, where a higher value indicates a better split. A value
* of 0 being the lowest, and indicating no value.
*/
protected abstract double getUnSplitScore(VC a);
protected double correctForGainRatio(double uncorrectedScore) {
/** call this method from score split only degreeOfGainRatioPenalty is non zero*/
return uncorrectedScore * (1 - degreeOfGainRatioPenalty) + degreeOfGainRatioPenalty * (uncorrectedScore / intrinsicValue);
}
}