/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.tree;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SortedExampleSet;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.tree.criterions.Criterion;
import com.rapidminer.tools.Tools;
import java.util.Iterator;
/**
* Calculates the best split point for numerical attributes according to a given criterion.
*
* @author Ingo Mierswa
*/
public class NumericalSplitter {
private Criterion criterion;
public NumericalSplitter(Criterion criterion) {
this.criterion = criterion;
}
public double getBestSplit(ExampleSet inputSet, Attribute attribute) throws OperatorException {
SortedExampleSet exampleSet = new SortedExampleSet(inputSet, attribute, SortedExampleSet.INCREASING);
// Attribute labelAttribute = exampleSet.getAttributes().getLabel(); // see bug report 952
// double oldLabel = Double.NaN; // see bug report 952
double bestSplit = Double.NaN;
double lastValue = Double.NaN;
double bestSplitBenefit = Double.NEGATIVE_INFINITY;
Example lastExample = null;
if (this.criterion.supportsIncrementalCalculation()) {
this.criterion.startIncrementalCalculation(exampleSet);
}
Iterator<Example> exampleIterator = exampleSet.iterator();
while (exampleIterator.hasNext()) {
Example e = exampleIterator.next();
// boolean isLast = !(exampleIterator.hasNext()); // see bug report 952
double currentValue = e.getValue(attribute);
// double label = e.getValue(labelAttribute); // see bug report 952
if (this.criterion.supportsIncrementalCalculation()) {
if (lastExample != null) {
this.criterion.swapExample(lastExample);
}
lastExample = e;
// if ((Double.isNaN(oldLabel)) || (oldLabel != label) || isLast) { // see bug
// report 952
// if ((Double.isNaN(oldLabel)) || (oldLabel != label)) { // see bug report 952
if (!Tools.isEqual(currentValue, lastValue)) {
double benefit = this.criterion.getIncrementalBenefit();
if (benefit > bestSplitBenefit) {
bestSplitBenefit = benefit;
bestSplit = (lastValue + currentValue) / 2.0d;
}
// oldLabel = label; // see bug report 952
}
// } // see bug report 952
// } else if ((Double.isNaN(oldLabel)) || (oldLabel != label)) { // see bug report
// 952
} else {
if (!Tools.isEqual(currentValue, lastValue)) {
double splitValue = (lastValue + currentValue) / 2.0d;
double benefit = this.criterion.getNumericalBenefit(exampleSet, attribute, splitValue);
if (benefit > bestSplitBenefit) {
bestSplitBenefit = benefit;
bestSplit = splitValue;
}
// oldLabel = label; // see bug report 952
}
}
lastValue = currentValue;
}
return bestSplit;
}
}