/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.tree.criterions;
import com.rapidminer.operator.learner.tree.ColumnExampleTable;
/**
* The criterion for a splitting the selected examples based on a {@link ColumnExampleTable} and a
* selection. Possible implementations are for example accuracy or information gain. The calculation
* can be done in parallel.
*
* @author Sebastian Land, Ingo Mierswa, Gisa Schaefer
* @since 6.2.000
*/
public interface ColumnCriterion {
/**
* Calculates the benefit for splitting the current selection of the columnTable at the nominal
* attribute represented by the attributeNumber
*
* @param columnTable
* @param selection
* @param attributeNumber
* @return
*/
public double getNominalBenefit(ColumnExampleTable columnTable, int[] selection, int attributeNumber);
/**
* Calculates the benefit for splitting the current selection of the columnTable at the split
* value of the numerical attribute represented by the attributeNumber
*
* @param columnTable
* @param selection
* @param attributeNumber
* @param splitValue
* @return
*/
public double getNumericalBenefit(ColumnExampleTable columnTable, int[] selection, int attributeNumber, double splitValue);
/**
* @return <code>true</code> if the benefit can be calculated out of a
* {@link WeightDistribution}.
*/
public boolean supportsIncrementalCalculation();
/**
* Calculates the {@link WeightDistribution} for at the beginning of the incrental calculation.
*
* @param columnTable
* @param selection
* @param attributeNumber
* @return
*/
public WeightDistribution startIncrementalCalculation(ColumnExampleTable columnTable, int[] selection,
int attributeNumber);
/**
* Updates the weight distribution when going to the next example.
*
* @param columnTable
* @param row
* @param distribution
*/
public void updateWeightDistribution(ColumnExampleTable columnTable, int row, WeightDistribution distribution);
/**
* Calculates the benefit out of the distribution.
*
* @param distribution
* @return
*/
public double getIncrementalBenefit(WeightDistribution distribution);
/**
* This method will return the calculated benefit if the weights would have distributed over the
* labels as given. The first index specifies the split fraction, the second index the label.
* For splits on nominal attributes, the number of split sides is determined by the number of
* possible values. For splits on numerical attributes, there are 2 split sides if there are no
* missing values, 3 otherwise.
*/
public double getBenefit(double[][] weightCounts);
}