/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.local;
import java.io.Serializable;
import java.util.Collection;
import Jama.Matrix;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.container.Tupel;
import com.rapidminer.tools.math.LinearRegression;
import com.rapidminer.tools.math.VectorMath;
import com.rapidminer.tools.math.container.GeometricDataCollection;
import com.rapidminer.tools.math.smoothing.SmoothingKernel;
/**
* @author Sebastian Land
*
*/
public class LocalPolynomialRegressionModel extends PredictionModel {
public static class RegressionData implements Serializable{
private static final long serialVersionUID = 8540161261369474329L;
private double[] exampleValues;
private double exampleLabel;
private double exampleWeight;
public RegressionData(double[] exampleValues, double exampleLabel, double exampleWeight) {
this.exampleValues = exampleValues;
this.exampleLabel = exampleLabel;
this.exampleWeight = exampleWeight;
}
public double[] getExampleValues() {
return exampleValues;
}
public double getExampleLabel() {
return exampleLabel;
}
public double getExampleWeight() {
return exampleWeight;
}
}
private GeometricDataCollection<RegressionData> samples;
private Neighborhood neighborhood;
private SmoothingKernel kernelSmoother;
private int degree;
private double ridge;
protected LocalPolynomialRegressionModel(ExampleSet trainingExampleSet, GeometricDataCollection<RegressionData> data, Neighborhood neighborhood, SmoothingKernel kernelSmoother, int degree, double ridge) {
super(trainingExampleSet);
this.samples = data;
this.neighborhood = neighborhood;
this.kernelSmoother = kernelSmoother;
this.degree = degree;
this.ridge = ridge;
}
private static final long serialVersionUID = -4874020185611138104L;
@Override
public ExampleSet performPrediction(ExampleSet exampleSet, Attribute predictedLabel) throws OperatorException {
Attributes attributes = exampleSet.getAttributes();
double[] probe = new double[attributes.size()];
for (Example example: exampleSet) {
// copying example values
int i = 0;
for (Attribute attribute: attributes) {
probe[i] = example.getValue(attribute);
i++;
}
// determining neighborhood
Collection<Tupel<Double, RegressionData>> localExamples = neighborhood.getNeighbourhood(samples, probe);
if (localExamples.size() > 1) {
// building matrixes
double[][] x = new double[localExamples.size()][];
double[][] y = new double[localExamples.size()][1];
double[] distance = new double[localExamples.size()];
double[] weight = new double[localExamples.size()];
int j = 0;
for (Tupel<Double, RegressionData> tupel: localExamples) {
distance[j] = tupel.getFirst(); // distance
x[j] = VectorMath.polynomialExpansion(tupel.getSecond().getExampleValues(), degree); // data itself
y[j][0] = tupel.getSecond().getExampleLabel(); // the label
weight[j] = tupel.getSecond().getExampleWeight();
j++;
}
// finding greatest distance
double maxDistance = Double.NEGATIVE_INFINITY;
for (j = 0; j < distance.length; j++) {
maxDistance = (maxDistance < distance[j])? distance[j] : maxDistance;
}
// using kernel smoother for locality weight calculation and multiply by example weight
for (j = 0; j < distance.length; j++) {
weight[j] = weight[j] * kernelSmoother.getWeight(distance[j], maxDistance);
}
double[] coefficients = LinearRegression.performRegression(new Matrix(x), new Matrix(y), weight, ridge);
double[] probeExpaneded = VectorMath.polynomialExpansion(probe, degree);
example.setPredictedLabel(VectorMath.vectorMultiplication(probeExpaneded, coefficients));
} else {
if (localExamples.size() == 1) {
example.setPredictedLabel(localExamples.iterator().next().getSecond().getExampleLabel());
} else {
example.setPredictedLabel(Double.NaN);
}
}
}
return exampleSet;
}
@Override
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("This model contains " + samples.size() + " examples for determining the neighborhood." + Tools.getLineSeparator());
buffer.append("The fitted polynomial is of degree " + degree + " and is fitted with a ridge factor of " + ridge + Tools.getLineSeparator());
buffer.append("It uses the " + neighborhood.toString() + " for neighborhood determination." + Tools.getLineSeparator());
buffer.append("Weighting is performed using the " + kernelSmoother.toString());
return buffer.toString();
}
public GeometricDataCollection<RegressionData> getSamples() {
return samples;
}
public Neighborhood getNeighborhood() {
return neighborhood;
}
public SmoothingKernel getKernelSmoother() {
return kernelSmoother;
}
public int getDegree() {
return degree;
}
public double getRidge() {
return ridge;
}
public String[] getAttributeNames() {
ExampleSet trainSet = getTrainingHeader();
Attributes attributes = trainSet.getAttributes();
String[] attributeNames = new String[attributes.size()];
int i = 0;
for (Attribute attribute: attributes) {
attributeNames[i] = attribute.getName();
i++;
}
return attributeNames;
}
}