/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.kernel.jmysvm.examples;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import com.rapidminer.example.Attribute;
import com.rapidminer.tools.Tools;
/**
* Implementation of a sparse example set which can be used for learning. This
* data structure is also used as SVM model.
*
* @author Stefan Rueping, Ingo Mierswa
* @version $Id: SVMExamples.java,v 1.3 2008/05/09 19:23:19 ingomierswa Exp $
*/
public class SVMExamples implements Serializable {
private static final long serialVersionUID = 7204578592570791663L;
/**
* This class holds information aboud the means and variances of an
* attribute. This is needed to use the same values for the test set if
* scaling is performed by an SVM operator instead of using a preprocessing
* step.
*/
public static class MeanVariance implements Serializable {
private static final long serialVersionUID = 2700347887530126670L;
private double mean = 0.0d;
private double variance = 0.0d;
public MeanVariance(double mean, double variance) {
this.mean = mean;
this.variance = variance;
}
public double getMean() {
return mean;
}
public double getVariance() {
return variance;
}
}
/** The dimension of the example set. */
private int dim;
/** The number of examples. */
private int train_size;
// sparse representation of examples. public for avoiding invocation of a
// method (slower)
/** The known attribute values for each example. */
public double[][] atts;
/**
* The corresponding indices for the known attribute values for each
* example.
*/
public int[][] index;
/** The ids of all examples. */
public String[] ids;
/** The SVM alpha values. Will be filled by learning. */
private double[] alphas;
/**
* The labels of the examples if known. -1 and +1 for classification or the
* real value for regression tasks. Will be filled by prediction.
*/
private double[] ys;
/** The hyperplane offset. */
private double b;
/**
* This example will be once constructed and delivered with the asked
* values.
*/
private SVMExample x;
/**
* This map stores the mean-variance informations about all attributes (att
* index --> mean-variance). This information is used to scale the data from
* the test set.
*/
private Map<Integer, MeanVariance> meanVarianceMap = new HashMap<Integer, MeanVariance>();
/** Creates an empty example set of the given size. */
public SVMExamples(int size, double b) {
this.train_size = size;
this.b = b;
atts = new double[train_size][];
index = new int[train_size][];
ys = new double[train_size];
alphas = new double[train_size];
ids = new String[size];
x = new SVMExample();
}
private static Map<Integer, MeanVariance> createMeanVariances(com.rapidminer.example.ExampleSet exampleSet) {
double[] sum = new double[exampleSet.getAttributes().size()];
double[] squaredSum = new double[sum.length];
Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
while (reader.hasNext()) {
com.rapidminer.example.Example example = reader.next();
int a = 0;
for (Attribute attribute : exampleSet.getAttributes()) {
double value = example.getValue(attribute);
sum[a] += value;
squaredSum[a] += value * value;
a++;
}
}
Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>();
for (int a = 0; a < sum.length; a++) {
sum[a] /= exampleSet.size();
squaredSum[a] /= exampleSet.size();
meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a])));
}
return meanVariances;
}
public SVMExamples(com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, boolean scale) {
this(exampleSet, labelAttribute, scale ? createMeanVariances(exampleSet) : new HashMap<Integer, MeanVariance>());
}
/**
* Creates a fresh example set of the given size from the RapidMiner example
* reader. The alpha values and b are zero, the label will be set if it is
* known.
*/
public SVMExamples(com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, Map<Integer, MeanVariance> meanVariances) {
this(exampleSet.size(), 0.0d);
this.meanVarianceMap = meanVariances;
Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
Attribute idAttribute = exampleSet.getAttributes().getId();
int exampleCounter = 0;
while (reader.hasNext()) {
com.rapidminer.example.Example current = reader.next();
Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>();
int a = 0;
for (Attribute attribute : exampleSet.getAttributes()) {
double value = current.getValue(attribute);
if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) {
attributeMap.put(a, value);
}
if ((a + 1) > dim)
dim = (a + 1);
a++;
}
atts[exampleCounter] = new double[attributeMap.size()];
index[exampleCounter] = new int[attributeMap.size()];
Iterator<Map.Entry<Integer,Double>> i = attributeMap.entrySet().iterator();
int attributeCounter = 0;
while (i.hasNext()) {
Map.Entry<Integer,Double> e = i.next();
Integer indexValue = e.getKey();
Double attributeValue = e.getValue();
index[exampleCounter][attributeCounter] = indexValue.intValue();
double value = attributeValue.doubleValue();
MeanVariance meanVariance = meanVarianceMap.get(indexValue);
if (meanVariance != null) {
if (meanVariance.getVariance() == 0.0d)
value = 0.0d;
else
value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance());
}
atts[exampleCounter][attributeCounter] = value;
attributeCounter++;
}
if (labelAttribute != null) {
double label = current.getValue(labelAttribute);
if (labelAttribute.isNominal()) {
ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1);
} else {
ys[exampleCounter] = label;
}
}
if (idAttribute != null) {
ids[exampleCounter] = current.getValueAsString(idAttribute);
}
exampleCounter++;
}
}
/** Reads an example set from the given input stream. */
public SVMExamples(ObjectInputStream in) throws IOException {
this(in.readInt(), in.readDouble());
this.dim = in.readInt();
String scaleString = in.readUTF();
if (scaleString.equals("scale")) {
int numberOfAttributes = in.readInt();
this.meanVarianceMap = new HashMap<Integer, MeanVariance>();
for (int i = 0; i < numberOfAttributes; i++) {
int index = in.readInt();
double mean = in.readDouble();
double variance = in.readDouble();
meanVarianceMap.put(Integer.valueOf(index), new MeanVariance(mean, variance));
}
}
for (int e = 0; e < this.train_size; e++) {
index[e] = new int[in.readInt()];
atts[e] = new double[index[e].length];
for (int a = 0; a < index[e].length; a++) {
index[e][a] = in.readInt();
atts[e][a] = in.readDouble();
}
alphas[e] = in.readDouble();
ys[e] = in.readDouble();
}
}
public Map<Integer, MeanVariance> getMeanVariances() {
return meanVarianceMap;
}
public int getNumberOfSupportVectors() {
int result = 0;
for (int i = 0; i < alphas.length; i++)
if (alphas[i] != 0.0d)
result++;
return result;
}
/** Writes the example set into the given output stream. */
public void writeSupportVectors(ObjectOutputStream out) throws IOException {
out.writeInt(getNumberOfSupportVectors());
out.writeDouble(b);
out.writeInt(dim);
if ((meanVarianceMap == null) || (meanVarianceMap.size() == 0)) {
out.writeUTF("noscale");
} else {
out.writeUTF("scale");
out.writeInt(meanVarianceMap.size());
Iterator i = meanVarianceMap.keySet().iterator();
while (i.hasNext()) {
Integer index = (Integer) i.next();
MeanVariance meanVariance = meanVarianceMap.get(index);
out.writeInt(index.intValue());
out.writeDouble(meanVariance.getMean());
out.writeDouble(meanVariance.getVariance());
}
}
for (int e = 0; e < train_size; e++) {
if (alphas[e] != 0.0d) {
out.writeInt(atts[e].length);
for (int a = 0; a < atts[e].length; a++) {
out.writeInt(index[e][a]);
out.writeDouble(atts[e][a]);
}
out.writeDouble(alphas[e]);
out.writeDouble(ys[e]);
}
}
}
/**
* Counts the training examples.
*
* @return Number of examples
*/
public int count_examples() {
return train_size;
}
/**
* Counts the positive training examples
*
* @return Number of positive examples
*/
public int count_pos_examples() {
int result = 0;
for (int i = 0; i < train_size; i++) {
if (ys[i] > 0) {
result++;
}
}
return result;
}
/**
* Gets the dimension of the examples
*
* @return dim
*/
public int get_dim() {
return dim;
}
/**
* Gets an example.
*
* @param pos
* Number of example
* @return Array of example attributes in their default order
*/
public SVMExample get_example(int pos) {
x.att = atts[pos];
x.index = index[pos];
return x;
}
/**
* Gets an y-value.
*
* @param pos
* Number of example
* @return y
*/
public double get_y(int pos) {
return ys[pos];
}
/** Sets the label value for the specified example. */
public void set_y(int pos, double y) {
ys[pos] = y;
}
/**
* Gets the y array
*
* @return y
*/
public double[] get_ys() {
return ys;
}
/**
* Gets an alpha-value. Please note that the alpha values are already
* multiplied by the corresponding y-value.
*
* @param pos
* Number of example
* @return alpha
*/
public double get_alpha(int pos) {
return alphas[pos];
}
/**
* Gets the alpha array. Please note that the alpha values are already
* multiplied by the corresponding y-value.
*
* @return alpha
*/
public double[] get_alphas() {
return alphas;
}
/**
* swap two training examples
*
* @param pos1
* @param pos2
*/
public void swap(int pos1, int pos2) {
double[] dummyA = atts[pos1];
atts[pos1] = atts[pos2];
atts[pos2] = dummyA;
int[] dummyI = index[pos1];
index[pos1] = index[pos2];
index[pos2] = dummyI;
double dummyd = alphas[pos1];
alphas[pos1] = alphas[pos2];
alphas[pos2] = dummyd;
dummyd = ys[pos1];
ys[pos1] = ys[pos2];
ys[pos2] = dummyd;
}
/**
* get b
*
* @return b
*/
public double get_b() {
return b;
}
/**
* set b
*
* @param new_b
*/
public void set_b(double new_b) {
b = new_b;
}
/**
* sets an alpha value.
*
* @param pos
* Number of example
* @param alpha
* New value
*/
public void set_alpha(int pos, double alpha) {
alphas[pos] = alpha;
}
public void clearAlphas() {
for (int i = 0; i < alphas.length; i++)
alphas[i] = 0.0d;
}
// ================================================================================
public String getId(int index) {
return ids[index];
}
public String toString() {
return toString(atts.length, false);
}
public String toString(boolean onlySV) {
return toString(atts.length, onlySV);
}
public String toString(int numberOfExamples, boolean onlySV) {
StringBuffer result = new StringBuffer("SVM Example Set (" + (onlySV ? (getNumberOfSupportVectors() + " support vectors") : (train_size + " examples")) + "):"+Tools.getLineSeparator()+"b: " + b + Tools.getLineSeparator());
for (int e = 0; e < numberOfExamples; e++) {
if (!onlySV || (alphas[e] != 0.0d)) {
for (int a = 0; a < atts[e].length; a++) {
result.append(index[e][a] + ":");
result.append(atts[e][a] + " ");
}
result.append(", alpha: " + alphas[e]);
result.append(", y: " + ys[e] + Tools.getLineSeparator());
}
}
return result.toString();
}
};