/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.tools.math.similarity; import java.io.Serializable; import com.rapidminer.example.Attribute; import com.rapidminer.example.Attributes; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ports.InputPorts; import com.rapidminer.parameter.ParameterHandler; import com.rapidminer.tools.ReferenceCache; /** * This interfaces defines the methods for all similarity measures. Classes implementing this * interface are not allowed to have a constructor, instead should use the init method. * * @author Sebastian Land */ public abstract class DistanceMeasure implements Serializable { private static final long serialVersionUID = 1290079829430640414L; protected class DistanceMeasureConfig { Attribute[] firstSetAttributes; Attribute[] secondSetAttributes; // this indicates if a distance can be calculated at all boolean isMatching = true; public boolean isMatching() { return isMatching; } public Attribute[] getFirstSetAttributes() { return firstSetAttributes; } public Attribute[] getSecondSetAttributes() { return secondSetAttributes; } } /** * Configurations for large attribute sets might be expensive to calculate reference memory * intensive data structures (e.g., nominal mappings). */ private final static ReferenceCache<DistanceMeasureConfig> CONFIG_CACHE = new ReferenceCache<>(10); private transient ReferenceCache<DistanceMeasureConfig>.Reference initConfig = CONFIG_CACHE.newReference(null); /** * If you intend to use the method {@link #calculateDistance(Example, Example)} or * {@link #calculateSimilarity(Example, Example)} on examples of two different * {@link ExampleSet}s, you need to call this init method instead of {@link #init(ExampleSet)}. * * @param firstSet * : The exampleset of the first example given to the * {@link #calculateDistance(Example, Example)} method. * @param secondSet * : The exampleset of the second example given to the * {@link #calculateDistance(Example, Example)} method. */ public DistanceMeasureConfig init(Attributes firstSetAttributes, Attributes secondSetAttributes) { DistanceMeasureConfig config = new DistanceMeasureConfig(); config.firstSetAttributes = new Attribute[firstSetAttributes.size()]; if (config.firstSetAttributes.length == secondSetAttributes.size()) { int i = 0; for (Attribute attribute : firstSetAttributes) { config.firstSetAttributes[i] = attribute; i++; } if (firstSetAttributes == secondSetAttributes) { config.secondSetAttributes = config.firstSetAttributes; } else { config.secondSetAttributes = new Attribute[secondSetAttributes.size()]; i = 0; for (Attribute attribute : firstSetAttributes) { Attribute secondSetAttribute = secondSetAttributes.get(attribute.getName()); if (secondSetAttribute != null) { config.secondSetAttributes[i] = secondSetAttribute; i++; } else { config.isMatching = false; break; } } } } else { config.isMatching = false; } this.initConfig = CONFIG_CACHE.newReference(config); return config; } /** * Before using a similarity measure, it is needed to initialize. Subclasses might use * initializing for remembering the exampleset properties like attribute type or test if * applicable to exampleSet at all. Please note that it might be necessary to also override the * other init methods if this measure should make use of parameters or other IOObjects. * * Attention! Subclasses must call this super method to ensure correct initialization! * * @param exampleSet * the exampleset */ public void init(ExampleSet exampleSet) throws OperatorException { init(exampleSet.getAttributes(), exampleSet.getAttributes()); } /** * If using this measure only on examples of the same example set, you can use this method. * Otherwise please refer to {@link #init(ExampleSet, ExampleSet)}. * * Before using a similarity measure, it is needed to initialize. Subclasses might use * initializing for remembering the exampleset properties like attribute type or test if * applicable to exampleSet at all. This init method calls init(exampleSet) per default and * ignores the parameterHandler and the ioContainer. Subclasses might use the parameterHandler * to evaluate parameter settings and the IOContainer to access other objects. * * @param exampleSet * the exampleset * @param parameterHandler * the handler to ask for parameter values */ public void init(ExampleSet exampleSet, ParameterHandler parameterHandler) throws OperatorException { init(exampleSet); } /** * This method does the calculation of the distance between two double arrays. The meanings of * the double values might be remembered from the init method. * * @param value1 * @param value2 * @return the distance */ public abstract double calculateDistance(double[] value1, double[] value2); /** * This method does the similarity of the distance between two double arrays. The meanings of * the double values might be remembered from the init method. * * @param value1 * @param value2 * @return the distance */ public abstract double calculateSimilarity(double[] value1, double[] value2); /** * This method returns a boolean whether this measure is a distance measure * * @return true if is distance */ public boolean isDistance() { return true; } /** * This method returns a boolean whether this measure is a similarity measure * * @return true if is similarity */ public final boolean isSimilarity() { return !isDistance(); } /** * This is a convenient method for calculating the distance between examples. All attributes * will be used to form a double array, used for the calculateDistance method. * * It will call the {@link #init(ExampleSet, ExampleSet)} if not initialized yet. * * @return the distance */ public double calculateDistance(Example firstExample, Example secondExample) { DistanceMeasureConfig config = null; if (initConfig != null) { config = initConfig.get(); } if (config == null) { // this will build the config and assign it to the softreference initConfig config = init(firstExample.getAttributes(), secondExample.getAttributes()); } if (config.isMatching()) { double[] firstValues = new double[config.firstSetAttributes.length]; double[] secondValues = new double[config.secondSetAttributes.length]; for (int i = 0; i < firstValues.length; i++) { firstValues[i] = firstExample.getValue(config.firstSetAttributes[i]); secondValues[i] = secondExample.getValue(config.secondSetAttributes[i]); } return calculateDistance(firstValues, secondValues); } else { // attribute set not matching. return Double.NaN; } } /** * This is a convenient method for calculating the distance between examples and double arrays. * All attributes will be used to form a double array, used for the calculateDistance method. * * @return the distance */ public final double calculateDistance(Example firstExample, double[] second) { Attributes attributes = firstExample.getAttributes(); double[] firstValues = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { firstValues[i] = firstExample.getValue(attribute); i++; } return calculateDistance(firstValues, second); } /** * This is a convenient method for calculating the similarity between examples. All attributes * will be used to form a double array, used for the calculateDistance method. * * @return the distance */ public double calculateSimilarity(Example firstExample, Example secondExample) { DistanceMeasureConfig config = null; if (initConfig != null) { config = initConfig.get(); } if (config == null) { // this will build the config and assign it to the softreference initConfig config = init(firstExample.getAttributes(), secondExample.getAttributes()); } if (config.isMatching()) { double[] firstValues = new double[config.firstSetAttributes.length]; double[] secondValues = new double[config.secondSetAttributes.length]; for (int i = 0; i < firstValues.length; i++) { firstValues[i] = firstExample.getValue(config.firstSetAttributes[i]); secondValues[i] = secondExample.getValue(config.secondSetAttributes[i]); } return calculateSimilarity(firstValues, secondValues); } else { // attribute set not matching. return Double.NaN; } } /** * This is a convenient method for calculating the similarity between examples and a double * array. All attributes will be used to form a double array, used for the calculateDistance * method. * * @return the distance */ public final double calculateSimilarity(Example firstExample, double[] second) { Attributes attributes = firstExample.getAttributes(); double[] firstValues = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { firstValues[i] = firstExample.getValue(attribute); i++; } return calculateSimilarity(firstValues, second); } /** * If the computation of this distance measure depends on additional {@link IOObject}s, this * method can be overridden to install additional ports at the operator which uses this distance * measure. If this method is overridden, subclasses can make use of the data received at the * created ports in their {@link #init(ExampleSet, ParameterHandler)} method. <br/> * The default implementation does nothing. */ public void installAdditionalPorts(InputPorts inputPorts, ParameterHandler parameterHandler) {} /** * Undoes what {@link #installAdditionalPorts(InputPorts, ParameterHandler)} did. * * @see #installAdditionalPorts(InputPorts, ParameterHandler) */ public void uninstallAdditionalPorts(InputPorts inputPorts) {} }