/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.tree;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SortedExampleSet;
import com.rapidminer.example.set.SplittedExampleSet;
/**
* Calculates the best split point for numerical attributes according to
* a given criterion.
*
* @author Ingo Mierswa
* @version $Id: NumericalSplitter.java,v 1.10 2008/05/09 19:22:53 ingomierswa Exp $
*/
public class NumericalSplitter {
private Criterion criterion;
public NumericalSplitter(Criterion criterion) {
this.criterion = criterion;
}
public double getBestSplit(ExampleSet inputSet, Attribute attribute) {
SortedExampleSet exampleSet = new SortedExampleSet((ExampleSet)inputSet.clone(), attribute, SortedExampleSet.INCREASING);
Attribute labelAttribute = exampleSet.getAttributes().getLabel();
double oldLabel = Double.NaN;
double bestSplit = Double.NaN;
double lastValue = Double.NaN;
double bestSplitBenefit = Double.NEGATIVE_INFINITY;
int counter = -1;
Example lastExample = null;
if (this.criterion.supportsIncrementalCalculation()) {
this.criterion.startIncrementalCalculation(exampleSet);
}
for (Example e : exampleSet) {
counter++;
double currentValue = e.getValue(attribute);
double label = e.getValue(labelAttribute);
if (this.criterion.supportsIncrementalCalculation()) {
if (lastExample != null) this.criterion.swapExample(lastExample);
lastExample = e;
if ((Double.isNaN(oldLabel)) || (oldLabel != label)) {
double benefit = this.criterion.getIncrementalBenefit();
if (benefit > bestSplitBenefit) {
bestSplitBenefit = benefit;
bestSplit = (lastValue + currentValue) / 2.0d;
}
}
}
else if ((Double.isNaN(oldLabel)) || (oldLabel != label)) {
double splitValue = (lastValue + currentValue) / 2.0d;
SplittedExampleSet splitted = SplittedExampleSet.splitByAttribute(exampleSet, attribute, splitValue);
double benefit = this.criterion.getBenefit(splitted);
if (benefit > bestSplitBenefit) {
bestSplitBenefit = benefit;
bestSplit = splitValue;
}
oldLabel = label;
}
lastValue = currentValue;
}
return bestSplit;
}
}