/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.similarity;
import java.util.ArrayList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Tools;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetPassThroughRule;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.math.container.Range;
import com.rapidminer.tools.math.similarity.DistanceMeasure;
import com.rapidminer.tools.math.similarity.DistanceMeasures;
import com.rapidminer.tools.metadata.MetaDataTools;
/**
* <p>This operator creates an example set from a given similarity measure. It can either produce
* a long table format, i.e. something like<br />
* <br />
* id1 id2 sim<br />
* id1 id3 sim<br />
* id1 id4 sim<br />
* ...<br />
* id2 id1 sim<br />
* ...<br />
* <br />
* or a matrix format like here<br />
* <br />
* id id1 id2 id3 ...<br />
* id1 sim sim sim...<br />
* ...
* <br /></p>
*
* @author Ingo Mierswa
*/
public class Similarity2ExampleSet extends Operator {
private final InputPort similarityInput = getInputPorts().createPort("similarity", SimilarityMeasureObject.class);
private final InputPort exampleSetInput = getInputPorts().createPort("exampleSet", ExampleSet.class);
private final OutputPort exampleSetOutput = getOutputPorts().createPort("exampleSet");
public static final String PARAMETER_TABLE_TYPE = "table_type";
public static final String[] TABLE_TYPES = {
"long_table",
"matrix"
};
public static final int TABLE_TYPE_LONG_TABLE = 0;
public static final int TABLE_TYPE_MATRIX = 1;
public Similarity2ExampleSet(OperatorDescription description) {
super(description);
getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, exampleSetOutput, SetRelation.EQUAL) {
@Override
public ExampleSetMetaData modifyExampleSet(ExampleSetMetaData metaData) {
AttributeMetaData idAttribute = metaData.getSpecial(Attributes.ID_NAME);
try {
if (getParameterAsInt(PARAMETER_TABLE_TYPE) == TABLE_TYPE_LONG_TABLE) {
if (idAttribute == null)
MetaDataTools.checkAndCreateIds(metaData);
idAttribute = metaData.getSpecial(Attributes.ID_NAME);
ExampleSetMetaData newSet = new ExampleSetMetaData();
AttributeMetaData firstId = idAttribute.copy();
AttributeMetaData secondId = idAttribute.copy();
firstId.setName("FIRST_ID");
firstId.setRole(Attributes.ATTRIBUTE_NAME);
secondId.setName("SECOND_ID");
secondId.setRole(Attributes.ATTRIBUTE_NAME);
// determining if its distance or similarity
DistanceMeasure measure;
String name = "SIMILARITY";
try {
measure = DistanceMeasures.createMeasure(Similarity2ExampleSet.this);
if (measure.isDistance()) {
name = "DISTANCE";
}
} catch (UndefinedParameterError e) {
} catch (OperatorException e) {
}
AttributeMetaData distanceAttribute = new AttributeMetaData(name, Ontology.REAL, Attributes.ATTRIBUTE_NAME);
newSet.addAttribute(firstId);
newSet.addAttribute(secondId);
newSet.addAttribute(distanceAttribute);
// calculating size
if (metaData.getNumberOfExamples().isKnown())
newSet.setNumberOfExamples(metaData.getNumberOfExamples().getValue().intValue() * (metaData.getNumberOfExamples().getValue().intValue() - 1));
return newSet;
} else {
ExampleSetMetaData newSet = new ExampleSetMetaData();
if (metaData.getSpecial(Attributes.ID_NAME) == null && metaData.getNumberOfExamples().isKnown()) {
// then exact reproduction is possible
AttributeMetaData firstId = new AttributeMetaData("ID", Ontology.INTEGER, Attributes.ID_NAME);
newSet.addAttribute(firstId);
for (int i = 1; i <= metaData.getNumberOfExamples().getValue().intValue(); i++) {
AttributeMetaData attr = new AttributeMetaData("" + i, Ontology.REAL);
attr.setValueRange(new Range(0, Double.POSITIVE_INFINITY), SetRelation.SUBSET);
attr.setValueSetRelation(SetRelation.SUBSET);
newSet.addAttribute(attr);
}
newSet.setNumberOfExamples(metaData.getNumberOfExamples().getValue().intValue());
} else {
AttributeMetaData firstId = metaData.getSpecial(Attributes.ID_NAME).copy();
firstId.setName("ID");
newSet.addAttribute(firstId);
newSet.attributesAreSubset();
}
return newSet;
}
} catch (UndefinedParameterError e) {
}
return metaData;
}
});
}
@Override
public void doWork() throws OperatorException {
SimilarityMeasureObject measureObject = similarityInput.getData();
ExampleSet exampleSet = exampleSetInput.getData();
Tools.checkAndCreateIds(exampleSet);
DistanceMeasure measure = measureObject.getDistanceMeasure();
Attribute id = exampleSet.getAttributes().getId();
if (id == null) {
throw new UserError(this, 129);
}
ExampleSet result = null;
if (getParameterAsInt(PARAMETER_TABLE_TYPE) == TABLE_TYPE_LONG_TABLE) {
List<Attribute> attributes = new ArrayList<Attribute>(3);
Attribute firstIdAttribute = AttributeFactory.createAttribute("FIRST_ID", id.getValueType());
attributes.add(firstIdAttribute);
Attribute secondIdAttribute = AttributeFactory.createAttribute("SECOND_ID", id.getValueType());
attributes.add(secondIdAttribute);
String name = "SIMILARITY";
if (measure.isDistance()) {
name = "DISTANCE";
}
Attribute similarityAttribute = AttributeFactory.createAttribute(name, Ontology.REAL);
attributes.add(similarityAttribute);
MemoryExampleTable table = new MemoryExampleTable(attributes);
int i = 0;
for (Example example : exampleSet) {
int j = 0;
for (Example compExample : exampleSet) {
if (j != i) {
double[] data = new double[3];
if (id.isNominal()) {
data[0] = firstIdAttribute.getMapping().mapString(id.getMapping().mapIndex((int)example.getValue(id)));
data[1] = secondIdAttribute.getMapping().mapString(id.getMapping().mapIndex((int)compExample.getValue(id)));
} else {
data[0] = example.getValue(id);
data[1] = compExample.getValue(id);
}
if (measure.isDistance())
data[2] = measure.calculateDistance(example, compExample);
else
data[2] = measure.calculateSimilarity(example, compExample);
table.addDataRow(new DoubleArrayDataRow(data));
}
j++;
}
i++;
}
result = table.createExampleSet();
} else {
int numberOfExamples = exampleSet.size();
List<Attribute> attributes = new ArrayList<Attribute>(numberOfExamples + 1);
Attribute newIdAttribute = AttributeFactory.createAttribute("ID", id.getValueType());
attributes.add(newIdAttribute);
for (Example example: exampleSet) {
Attribute attribute;
if (id.getValueType() != Ontology.INTEGER)
attribute = AttributeFactory.createAttribute(example.getValueAsString(id), Ontology.REAL);
else
attribute = AttributeFactory.createAttribute("" + (int)example.getValue(id), Ontology.REAL);
attributes.add(attribute);
}
MemoryExampleTable table = new MemoryExampleTable(attributes);
for (Example example : exampleSet) {
double[] data = new double[numberOfExamples + 1];
if (id.isNominal()) {
data[0] = newIdAttribute.getMapping().mapString(id.getMapping().mapIndex((int)example.getValue(id)));
} else {
data[0] = example.getValue(id);
}
int index = 1;
for (Example compExample : exampleSet) {
if (measure.isDistance())
data[index++] = measure.calculateDistance(example, compExample);
else
data[index++] = measure.calculateSimilarity(example, compExample);
}
table.addDataRow(new DoubleArrayDataRow(data));
}
result = table.createExampleSet(null, null, newIdAttribute);
}
exampleSetOutput.deliver(result);
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeCategory(PARAMETER_TABLE_TYPE, "Indicates if the resulting table should have a matrix format or a long table format.", TABLE_TYPES, TABLE_TYPE_LONG_TABLE);
type.setExpert(false);
types.add(type);
return types;
}
}