/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.similarity; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DoubleArrayDataRow; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.tools.Ontology; /** * <p>This operator creates an example set from a given similarity measure. It can either produce * a long table format, i.e. something like<br /> * <br /> * id1 id2 sim<br /> * id1 id3 sim<br /> * id1 id4 sim<br /> * ...<br /> * id2 id1 sim<br /> * ...<br /> * <br /> * or a matrix format like here<br /> * <br /> * id id1 id2 id3 ...<br /> * id1 sim sim sim...<br /> * ... * <br /></p> * * @author Ingo Mierswa * @version $Id: Similarity2ExampleSet.java,v 1.2 2008/08/18 10:47:29 ingomierswa Exp $ */ public class Similarity2ExampleSet extends Operator { public static final String PARAMETER_TABLE_TYPE = "table_type"; public static final String[] TABLE_TYPES = { "long_table", "matrix" }; public static final int TABLE_TYPE_LONG_TABLE = 0; public static final int TABLE_TYPE_MATRIX = 1; public Similarity2ExampleSet(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { SimilarityMeasure similarityMeasure = getInput(SimilarityMeasure.class); ExampleSet result = null; if (getParameterAsInt(PARAMETER_TABLE_TYPE) == TABLE_TYPE_LONG_TABLE) { List<Attribute> attributes = new ArrayList<Attribute>(3); Attribute firstIdAttribute = AttributeFactory.createAttribute("FIRST_ID", Ontology.NOMINAL); attributes.add(firstIdAttribute); Attribute secondIdAttribute = AttributeFactory.createAttribute("SECOND_ID", Ontology.NOMINAL); attributes.add(secondIdAttribute); String name = "SIMILARITY"; if (similarityMeasure.isDistance()) { name = "DISTANCE"; } Attribute similarityAttribute = AttributeFactory.createAttribute(name, Ontology.REAL); attributes.add(similarityAttribute); MemoryExampleTable table = new MemoryExampleTable(attributes); Iterator<String> first = similarityMeasure.getIds(); while (first.hasNext()) { String firstId = first.next(); double firstIdMapping = firstIdAttribute.getMapping().mapString(firstId); Iterator<String> second = similarityMeasure.getIds(); while (second.hasNext()) { String secondId = second.next(); if (!firstId.equals(secondId)) { double[] data = new double[3]; data[0] = firstIdMapping; data[1] = secondIdAttribute.getMapping().mapString(secondId); data[2] = similarityMeasure.similarity(firstId, secondId); table.addDataRow(new DoubleArrayDataRow(data)); } } } result = table.createExampleSet(); } else { int numberOfIds = similarityMeasure.getNumberOfIds(); List<Attribute> attributes = new ArrayList<Attribute>(numberOfIds + 1); Attribute idAttribute = AttributeFactory.createAttribute("ID", Ontology.NOMINAL); attributes.add(idAttribute); Iterator<String> ids = similarityMeasure.getIds(); while (ids.hasNext()) { String id = ids.next(); Attribute attribute = AttributeFactory.createAttribute(id, Ontology.REAL); attributes.add(attribute); } MemoryExampleTable table = new MemoryExampleTable(attributes); Iterator<String> first = similarityMeasure.getIds(); while (first.hasNext()) { String firstId = first.next(); double[] data = new double[numberOfIds + 1]; data[0] = idAttribute.getMapping().mapString(firstId); int index = 1; Iterator<String> second = similarityMeasure.getIds(); while (second.hasNext()) { String secondId = second.next(); data[index++] = similarityMeasure.similarity(firstId, secondId); } table.addDataRow(new DoubleArrayDataRow(data)); } result = table.createExampleSet(null, null, idAttribute); } return new IOObject[] { result }; } public Class<?>[] getInputClasses() { return new Class[] { SimilarityMeasure.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeCategory(PARAMETER_TABLE_TYPE, "Indicates if the resulting table should have a matrix format or a long table format.", TABLE_TYPES, TABLE_TYPE_LONG_TABLE); type.setExpert(false); types.add(type); return types; } }