/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.math.similarity;
import java.io.Serializable;
import java.lang.ref.SoftReference;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPorts;
import com.rapidminer.parameter.ParameterHandler;
/**
* This interfaces defines the methods for all similarity measures. Classes implementing this
* interface are not allowed to have a constructor, instead should use the init method.
*
* @author Sebastian Land
*/
public abstract class DistanceMeasure implements Serializable {
private static final long serialVersionUID = 1290079829430640414L;
private class DistanceMeasureConfig {
Attribute[] firstSetAttributes;
Attribute[] secondSetAttributes;
// this indicates if a distance can be calculated at all
boolean isMatching = true;
}
private transient SoftReference<DistanceMeasureConfig> initConfig = new SoftReference<DistanceMeasure.DistanceMeasureConfig>(null);
/**
* If you intend to use the method {@link #calculateDistance(Example, Example)} or {@link #calculateSimilarity(Example, Example)} on
* examples of two different {@link ExampleSet}s,
* you need to call this init method instead of {@link #init(ExampleSet)}.
*
* @param firstSet
* : The exampleset of the first example given to the {@link #calculateDistance(Example, Example)} method.
* @param secondSet
* : The exampleset of the second example given to the {@link #calculateDistance(Example, Example)} method.
*/
public DistanceMeasureConfig init(Attributes firstSetAttributes, Attributes secondSetAttributes) {
DistanceMeasureConfig config = new DistanceMeasureConfig();
config.firstSetAttributes = new Attribute[firstSetAttributes.size()];
if (config.firstSetAttributes.length == secondSetAttributes.size()) {
int i = 0;
for (Attribute attribute : firstSetAttributes) {
config.firstSetAttributes[i] = attribute;
i++;
}
if (firstSetAttributes == secondSetAttributes) {
config.secondSetAttributes = config.firstSetAttributes;
} else {
config.secondSetAttributes = new Attribute[secondSetAttributes.size()];
i = 0;
for (Attribute attribute : firstSetAttributes) {
Attribute secondSetAttribute = secondSetAttributes.get(attribute.getName());
if (secondSetAttribute != null) {
config.secondSetAttributes[i] = secondSetAttribute;
i++;
} else {
config.isMatching = false;
break;
}
}
}
} else {
config.isMatching = false;
}
this.initConfig = new SoftReference<DistanceMeasure.DistanceMeasureConfig>(config);
return config;
}
/**
* Before using a similarity measure, it is needed to initialize. Subclasses might use initializing
* for remembering the exampleset properties like attribute type or test if applicable to exampleSet at all.
* Please note that it might be necessary to also override the other init methods if this measure should make
* use of parameters or other IOObjects.
*
* Attention! Subclasses must call this super method to ensure correct initialization!
*
* @param exampleSet
* the exampleset
*/
public void init(ExampleSet exampleSet) throws OperatorException {
init(exampleSet.getAttributes(), exampleSet.getAttributes());
}
/**
* If using this measure only on examples of the same example set, you can use this method. Otherwise please
* refer to {@link #init(ExampleSet, ExampleSet)}.
*
* Before using a similarity measure, it is needed to initialize. Subclasses might use initializing
* for remembering the exampleset properties like attribute type or test if applicable to exampleSet at all.
* This init method calls init(exampleSet) per default and ignores the parameterHandler and the ioContainer. Subclasses might use
* the parameterHandler to evaluate parameter settings and the IOContainer to access other objects.
*
* @param exampleSet
* the exampleset
* @param parameterHandler
* the handler to ask for parameter values
*/
public void init(ExampleSet exampleSet, ParameterHandler parameterHandler) throws OperatorException {
init(exampleSet);
}
/**
* This method does the calculation of the distance between two double arrays. The meanings of
* the double values might be remembered from the init method.
*
* @param value1
* @param value2
* @return the distance
*/
public abstract double calculateDistance(double[] value1, double[] value2);
/**
* This method does the similarity of the distance between two double arrays. The meanings of
* the double values might be remembered from the init method.
*
* @param value1
* @param value2
* @return the distance
*/
public abstract double calculateSimilarity(double[] value1, double[] value2);
/**
* This method returns a boolean whether this measure is a distance measure
*
* @return true if is distance
*/
public boolean isDistance() {
return true;
}
/**
* This method returns a boolean whether this measure is a similarity measure
*
* @return true if is similarity
*/
public final boolean isSimilarity() {
return !isDistance();
}
/**
* This is a convenient method for calculating the distance between examples.
* All attributes will be used to form a double array, used for the calculateDistance method.
*
* It will call the {@link #init(ExampleSet, ExampleSet)} if not initialized yet.
*
* @return the distance
*/
public double calculateDistance(Example firstExample, Example secondExample) {
DistanceMeasureConfig config = null;
if (initConfig != null)
config = initConfig.get();
if (config == null) {
// this will build the config and assign it to the softreference initConfig
config = init(firstExample.getAttributes(), secondExample.getAttributes());
}
if (config.isMatching) {
double[] firstValues = new double[config.firstSetAttributes.length];
double[] secondValues = new double[config.secondSetAttributes.length];
for (int i = 0; i < firstValues.length; i++) {
firstValues[i] = firstExample.getValue(config.firstSetAttributes[i]);
secondValues[i] = secondExample.getValue(config.secondSetAttributes[i]);
}
return calculateDistance(firstValues, secondValues);
} else {
// attribute set not matching.
return Double.NaN;
}
}
/**
* This is a convenient method for calculating the distance between examples and double arrays.
* All attributes will be used to form a double array, used for the calculateDistance method.
*
* @return the distance
*/
public final double calculateDistance(Example firstExample, double[] second) {
Attributes attributes = firstExample.getAttributes();
double[] firstValues = new double[attributes.size()];
int i = 0;
for (Attribute attribute : attributes) {
firstValues[i] = firstExample.getValue(attribute);
i++;
}
return calculateDistance(firstValues, second);
}
/**
* This is a convenient method for calculating the similarity between examples.
* All attributes will be used to form a double array, used for the calculateDistance method.
*
* @return the distance
*/
public double calculateSimilarity(Example firstExample, Example secondExample) {
DistanceMeasureConfig config = null;
if (initConfig != null)
config = initConfig.get();
if (config == null) {
// this will build the config and assign it to the softreference initConfig
config = init(firstExample.getAttributes(), secondExample.getAttributes());
}
if (config.isMatching) {
double[] firstValues = new double[config.firstSetAttributes.length];
double[] secondValues = new double[config.secondSetAttributes.length];
for (int i = 0; i < firstValues.length; i++) {
firstValues[i] = firstExample.getValue(config.firstSetAttributes[i]);
secondValues[i] = secondExample.getValue(config.secondSetAttributes[i]);
}
return calculateSimilarity(firstValues, secondValues);
} else {
// attribute set not matching.
return Double.NaN;
}
}
/**
* This is a convenient method for calculating the similarity between examples and a double array.
* All attributes will be used to form a double array, used for the calculateDistance method.
*
* @return the distance
*/
public final double calculateSimilarity(Example firstExample, double[] second) {
Attributes attributes = firstExample.getAttributes();
double[] firstValues = new double[attributes.size()];
int i = 0;
for (Attribute attribute : attributes) {
firstValues[i] = firstExample.getValue(attribute);
i++;
}
return calculateSimilarity(firstValues, second);
}
/**
* If the computation of this distance measure depends on additional {@link IOObject}s,
* this method can be overridden to install additional ports at the operator which uses
* this distance measure. If this method is overridden, subclasses can make use of the data
* received at the created ports in their {@link #init(ExampleSet, ParameterHandler)} method. <br/>
* The default implementation does nothing.
* */
public void installAdditionalPorts(InputPorts inputPorts, ParameterHandler parameterHandler) {
}
/**
* Undoes what {@link #installAdditionalPorts(InputPorts, ParameterHandler)} did.
*
* @see #installAdditionalPorts(InputPorts, ParameterHandler)
*/
public void uninstallAdditionalPorts(InputPorts inputPorts) {
}
}