/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.math.similarity.nominal;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Tools;
import com.rapidminer.example.table.NominalMapping;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.math.similarity.SimilarityMeasure;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* This is the abstract superclass for all nominal similarity measures.
*
* @author Sebastian Land
*/
public abstract class AbstractNominalSimilarity extends SimilarityMeasure {
private static final long serialVersionUID = 3932502337712338892L;
private boolean initiated = false;
private boolean[] binominal;
private double[] falseIndexSet1;
private double[] falseIndexSet2;
private Map<Integer, Map<Double, Double>> indexMappingSet1;
private Map<Integer, Map<Double, Double>> indexMappingSet2;
@Override
public double calculateDistance(double[] value1, double[] value2) {
return -calculateSimilarity(value1, value2);
}
@Override
public double calculateSimilarity(double[] value1, double[] value2) {
if (!initiated) {
throw new IllegalStateException("Similarity object is not initialized properly");
}
int equalNonFalseValues = 0;
int unequalValues = 0;
int equalFalseValues = 0;
for (int i = 0; i < value1.length; i++) {
// if one value is a Double.NaN, we will treat this as unequal values
if (Double.isNaN(value1[i]) || Double.isNaN(value2[i])) {
unequalValues++;
} else if (binominal[i]) {
if (value1[i] == falseIndexSet1[i] && value2[i] == falseIndexSet2[i]) {
equalFalseValues++;
} else {
if (value1[i] != falseIndexSet1[i] && value2[i] != falseIndexSet2[i]) {
equalNonFalseValues++;
} else {
unequalValues++;
}
}
} else { // Polynominal mapping
Map<Double, Double> indexMapping1 = indexMappingSet1.get(i);
Map<Double, Double> indexMapping2 = indexMappingSet2.get(i);
// Check if common mapping was created. If not the values have unequal values.
if (!indexMapping1.containsKey(value1[i]) || !indexMapping2.containsKey(value2[i])) {
unequalValues++;
} else {
double commonValue1 = indexMapping1.get(value1[i]);
double commonValue2 = indexMapping2.get(value2[i]);
if (commonValue1 != commonValue2) {
unequalValues++;
} else {
equalNonFalseValues++;
}
}
}
// if (value1[i] == value2[i])
// if (binominal[i]) {
// if (value1[i] == falseIndexSet1[i])
// falseValues++;
// else
// equalNonFalseValues++;
// } else
// equalNonFalseValues++;
// else {
// unequalValues++;
// }
}
return calculateSimilarity(equalNonFalseValues, unequalValues, equalFalseValues);
}
/**
* Calculate a similarity given the number of attributes for which both examples agree/disagree.
*
* @param equalNonFalseValues
* the number of attributes for which both examples are equal and non-zero
* @param unequalValues
* the number of attributes for which both examples have unequal values
* @param equalFalseValues
* the number of attributes for which both examples have zero values
* @return the similarity
*/
protected abstract double calculateSimilarity(double equalNonFalseValues, double unequalValues, double equalFalseValues);
@Override
public void init(ExampleSet exampleSet) throws OperatorException {
Tools.onlyNominalAttributes(exampleSet, "nominal similarities");
init(exampleSet.getAttributes(), exampleSet.getAttributes());
}
/*
* (non-Javadoc)
*
* @see
* com.rapidminer.tools.math.similarity.DistanceMeasure#init(com.rapidminer.example.Attributes,
* com.rapidminer.example.Attributes)
*/
@Override
public DistanceMeasureConfig init(Attributes firstSetAttributes, Attributes secondSetAttributes) {
DistanceMeasureConfig config = super.init(firstSetAttributes, secondSetAttributes);
if (config.isMatching()) {
init(config.getFirstSetAttributes(), config.getSecondSetAttributes());
}
return config;
}
/**
* Initializes the private fields for similarity computation.
*/
private void init(Attribute[] attributes1, Attribute[] attributes2) {
int length = attributes1.length;
indexMappingSet1 = new HashMap<Integer, Map<Double, Double>>();
indexMappingSet2 = new HashMap<Integer, Map<Double, Double>>();
binominal = new boolean[length];
falseIndexSet1 = new double[length];
falseIndexSet2 = new double[length];
for (int i = 0; i < length; i++) {
Attribute attribute1 = attributes1[i];
Attribute attribute2 = attributes2[i];
boolean binominalAttr1 = attribute1.getValueType() == Ontology.BINOMINAL;
boolean binominalAttr2 = attribute2.getValueType() == Ontology.BINOMINAL;
binominal[i] = binominalAttr1 && binominalAttr2;
if (binominal[i]) {
falseIndexSet1[i] = attribute1.getMapping().getNegativeIndex();
falseIndexSet2[i] = attribute2.getMapping().getNegativeIndex();
String negativeStringAttr1 = attribute1.getMapping().getNegativeString();
String negativeStringAttr2 = attribute2.getMapping().getNegativeString();
String positiveStringAttr1 = attribute1.getMapping().getPositiveString();
String positiveStringAttr2 = attribute2.getMapping().getPositiveString();
// Testing if mappings are switched only if positive&negative mapping is complete
if (negativeStringAttr1 != null && negativeStringAttr2 != null && positiveStringAttr1 != null
&& positiveStringAttr2 != null) {
// Do we have unequal strings for the binominal mapping? ...
if (!(negativeStringAttr1.equals(negativeStringAttr2) && positiveStringAttr1.equals(positiveStringAttr2))) {
// ... if not, are the strings switched?
if (negativeStringAttr1.equals(positiveStringAttr2)
&& negativeStringAttr2.equals(positiveStringAttr1)) {
// ... if yes we will remap the false value of the second attribute to
// the false value of the first attribute
falseIndexSet2[i] = attribute2.getMapping().getPositiveIndex();
} else {
// ... otherwise we will threat them as polynominal
binominal[i] = false;
createCommonMapping(attribute1, attribute2, i);
falseIndexSet1[i] = Double.NaN;
falseIndexSet2[i] = Double.NaN;
}
}
}
} else {
createCommonMapping(attribute1, attribute2, i);
falseIndexSet1[i] = Double.NaN;
falseIndexSet2[i] = Double.NaN;
}
}
initiated = true;
}
private void createCommonMapping(Attribute attribute1, Attribute attribute2, int attributeIndex) {
Map<Double, Double> indexMap1 = new HashMap<Double, Double>();
Map<Double, Double> indexMap2 = new HashMap<Double, Double>();
indexMappingSet1.put(attributeIndex, indexMap1);
indexMappingSet2.put(attributeIndex, indexMap2);
NominalMapping mappingAttr1 = attribute1.getMapping();
NominalMapping mappingAttr2 = attribute2.getMapping();
Set<String> values = new HashSet<String>(mappingAttr1.getValues());
values.addAll(mappingAttr2.getValues());
int index = 0;
for (String value : values) {
int valueIndexAttr1 = mappingAttr1.getIndex(value);
int valueIndexAttr2 = mappingAttr2.getIndex(value);
// Both attributes have a mapping for this value
if (valueIndexAttr1 != -1 && valueIndexAttr2 != -1) {
indexMap1.put((double) valueIndexAttr1, (double) index);
indexMap2.put((double) valueIndexAttr2, (double) index);
index++;
}
}
}
}