/**
*
*/
package org.streaminer.stream.classifier.tree;
import org.streaminer.util.math.MathHelper;
import java.io.Serializable;
/**
* <p>
* Interface to define a quality criterion to be used to construct the tree. Available:<br/>
* {@link EntropyCriterion} (information gain) as used by ID3<br/>
* {@link GiniCriterion} (Gini-Index) as used by CART
* </p>
*
* @author Tobias Beckers
*/
public interface QualityCriterion extends Serializable{
public final static QualityCriterion INFO_GAIN = new EntropyCriterion();
public final static QualityCriterion GINI_INDEX = new GiniCriterion();
/**
* Returns the quality for the given probabilities
* @param probabilities probability values must sum up to 1
* @return the quality for the given probabilities
*/
public double getQuality(double ... probabilities);
/**
* Returns the lower bound of gain for this quality criterion
* @return the lower bound of gain for this quality criterion
*/
public double getLowestGain();
/**
* Returns the upper bound of gain for this quality criterion depending on the number of possible classes
* @param numberOfClasses the number of possible classes
* @return the upper bound of gain for this quality criterion depending on the number of possible classes
*/
public double getHighestGain(int numberOfClasses);
/**
* Returns the name of this quality criterion
* @return the name of this quality criterion
*/
@Override
public String toString();
/** The {@link QualityCriterion} 'Gini-Index' as used by CART */
public class GiniCriterion implements QualityCriterion {
private static final long serialVersionUID = 1L;
/** {@inheritDoc} */
@Override
public double getQuality(double... probabilities) {
double giniIndex = 0d;
for (int i = 0; i < probabilities.length; i++) {
giniIndex += (probabilities[i]*probabilities[i]);
}
return (1 - giniIndex);
}
/** {@inheritDoc} */
@Override
public double getHighestGain(int numberOfClasses) {
return 1d - (1d / numberOfClasses);
}
/** {@inheritDoc} */
@Override
public double getLowestGain() {
return 0d;
}
/** {@inheritDoc} */
@Override
public String toString() {
return "Gini-Index";
}
}
/**
* The {@link QualityCriterion} 'entropy' or 'information (gain)' as used by ID3
*/
public class EntropyCriterion implements QualityCriterion {
private static final long serialVersionUID = 1L;
/** {@inheritDoc} */
@Override
public double getQuality(double ... probabilities) {
double entropy = 0d;
for (int i = 0; i < probabilities.length; i++) {
if (probabilities[i] > 0d) {
entropy -= probabilities[i] * MathHelper.log2(probabilities[i]);
}
}
return entropy;
}
/** {@inheritDoc} */
@Override
public double getHighestGain(int numberOfClasses) {
return MathHelper.log2(numberOfClasses);
}
/** {@inheritDoc} */
@Override
public double getLowestGain() {
return 0d;
}
/** {@inheritDoc} */
@Override
public String toString() {
return "information";
}
}
}