/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeParser; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Tools; import com.rapidminer.example.set.AttributeWeightedExampleSet; import com.rapidminer.example.set.SimpleExampleSet; import com.rapidminer.example.table.AbstractExampleTable; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.generator.GenerationException; import com.rapidminer.tools.RandomGenerator; /** * If the example set contain two equivalent attributes, the longer * representation is removed. The length is calculated as the number of nested * brackets. The equivalency probe is not done by structural comparison. The * attribute values of the equations in question are randomly sampled and the * equation results compared. If the difference is less than <i>epsilon</i> for * <i>k</i> trials, the equations are probably equivalent. At least they * produce similar values. <br/> * * The values of the attributes are sampled in the range of the minimum and * maximum values of the attribute. This ensures equivalency or at least very * similar values for the definition range in question. Therefore a * {@link MemoryExampleTable} is constructed and filled with random values. Then * a {@link AttributeParser} is used to construct the attributes values. * * @author Ingo Mierswa * @version $Id: EquivalentAttributeRemoval.java,v 2.17 2006/03/27 13:21:58 * ingomierswa Exp $ */ public class EquivalentAttributeRemoval extends IndividualOperator { /** * Indicates the number of examples which should be randomly generated to * check equivalency. */ private int numberOfSamples = 5; /** * If the difference is smaller than epsilon, the attributes are considered * as equivalent. */ private double epsilon = 0.05d; /** Recalculates attribute statistics before sampling. */ private boolean recalculateAttributeStatistics = false; /** The random generator for the example values. */ private RandomGenerator random; /** Creates a new equivalent attribute removal population operator. */ public EquivalentAttributeRemoval(int numberOfSamples, double epsilon, boolean recalculateAttributeStatistics, RandomGenerator random) { this.numberOfSamples = numberOfSamples; this.epsilon = epsilon; this.recalculateAttributeStatistics = recalculateAttributeStatistics; this.random = random; } public List<Individual> operate(Individual individual) { AttributeWeightedExampleSet exampleSet = individual.getExampleSet(); if (recalculateAttributeStatistics) exampleSet.recalculateAllAttributeStatistics(); Attribute[] allAttributes = exampleSet.getExampleTable().getAttributes(); List<Attribute> simpleAttributesList = new ArrayList<Attribute>(); for (int i = 0; i < allAttributes.length; i++) { if ((allAttributes[i] != null) && (!allAttributes[i].getConstruction().isGenerated())) simpleAttributesList.add(allAttributes[i]); } Map<String, Attribute> removeMap = new HashMap<String, Attribute>(); Attribute[] attributeArray = exampleSet.getAttributes().createRegularAttributeArray(); for (int i = 0; i < attributeArray.length; i++) { for (int j = i + 1; j < attributeArray.length; j++) { Attribute att1 = attributeArray[i]; Attribute att2 = attributeArray[j]; if (att1.getConstruction().equals(att2.getConstruction())) { removeMap.put(att2.getName(), att2); } else { AbstractExampleTable exampleTable = new MemoryExampleTable(simpleAttributesList, new DataRowFactory(DataRowFactory.TYPE_DOUBLE_ARRAY, '.'), numberOfSamples); try { // create parser AttributeParser parser = new AttributeParser(exampleTable); // create data set and attributes to check Tools.fillTableWithRandomValues(exampleTable, exampleSet, random); ExampleSet randomSet = new SimpleExampleSet(exampleTable, new LinkedList<Attribute>()); parser.generateAttribute(randomSet.getLog(), att1.getConstruction().getDescription(false)); parser.generateAttribute(randomSet.getLog(), att2.getConstruction().getDescription(false)); // add longer attribute to remove map if equivalent if (equivalent(randomSet)) { int depth1 = att1.getConstruction().getDepth(); int depth2 = att2.getConstruction().getDepth(); if (depth1 > depth2) removeMap.put(att1.getName(), att1); else removeMap.put(att2.getName(), att2); } } catch (GenerationException e) { exampleSet.getLog().logWarning("Cannot generate test attribute: " + e.getMessage() + ". We just keep both attributes for sure..."); } } } } Iterator i = removeMap.values().iterator(); while (i.hasNext()) { Attribute attribute = (Attribute) i.next(); exampleSet.getLog().log("Remove equivalent attribute '" + attribute.getName() + "'."); exampleSet.getAttributes().remove(attribute); } List<Individual> l = new LinkedList<Individual>(); l.add(new Individual(exampleSet)); return l; } private boolean equivalent(ExampleSet exampleSet) { if (exampleSet.getAttributes().size() < 2) { return true; } else { Iterator<Example> reader = exampleSet.iterator(); Iterator<Attribute> a = exampleSet.getAttributes().iterator(); Attribute a1 = a.next(); Attribute a2 = a.next(); if (a1.equals(a2)) return true; while (reader.hasNext()) { Example example = reader.next(); if (Math.abs(example.getValue(a1) - example.getValue(a2)) > epsilon) return false; } return true; } } }