/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.selection; import java.util.Iterator; import java.util.List; import java.util.Vector; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.InputDescription; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; /** * This operator selects all attributes which have a weight fulfilling a given * condition. For example, only attributes with a weight greater than * <code>min_weight</code> should be selected. This operator is also able * to select the k attributes with the highest weight. * * @author Ingo Mierswa, Stefan Rueping * @version $Id: AttributeWeightSelection.java,v 1.7 2008/07/07 07:06:45 ingomierswa Exp $ */ public class AttributeWeightSelection extends Operator { /** The parameter name for "Use this weight for the selection relation." */ public static final String PARAMETER_WEIGHT = "weight"; /** The parameter name for "Selects only weights which fulfill this relation." */ public static final String PARAMETER_WEIGHT_RELATION = "weight_relation"; /** The parameter name for "Number k of attributes to be selected for weight-relations 'top k' or 'bottom k'." */ public static final String PARAMETER_K = "k"; /** The parameter name for "Percentage of attributes to be selected for weight-relations 'top p%' or 'bottom p%'." */ public static final String PARAMETER_P = "p"; /** The parameter name for "Indicates if attributes which weight is unknown should be deselected." */ public static final String PARAMETER_DESELECT_UNKNOWN = "deselect_unknown"; /** The parameter name for "Indicates if the absolute values of the weights should be used for comparison." */ public static final String PARAMETER_USE_ABSOLUTE_WEIGHTS = "use_absolute_weights"; private static final String[] WEIGHT_RELATIONS = { "greater", "greater equals", "equals", "less equals", "less", "top k", "bottom k", "all but top k", "all but bottom k", "top p%", "bottom p%" }; private static final int GREATER = 0; private static final int GREATER_EQUALS = 1; private static final int EQUALS = 2; private static final int LESS_EQUALS = 3; private static final int LESS = 4; private static final int TOPK = 5; private static final int BOTTOMK = 6; private static final int ALLBUTTOPK = 7; private static final int ALLBUTBOTTOMK = 8; private static final int TOPPPERCENT = 9; private static final int BOTTOMPPERCENT = 10; public AttributeWeightSelection(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { ExampleSet eSet = getInput(ExampleSet.class); AttributeWeights weights = getInput(AttributeWeights.class); boolean deselectUnknown = getParameterAsBoolean(PARAMETER_DESELECT_UNKNOWN); double relationWeight = getParameterAsDouble(PARAMETER_WEIGHT); int relation = getParameterAsInt(PARAMETER_WEIGHT_RELATION); boolean useAbsoluteWeights = getParameterAsBoolean(PARAMETER_USE_ABSOLUTE_WEIGHTS); // determine which attributes have a known weight value boolean[] weightKnown = new boolean[eSet.getAttributes().size()]; Vector<Attribute> knownAttributes = new Vector<Attribute>(); int index = 0; for (Attribute attribute : eSet.getAttributes()) { double weight = weights.getWeight(attribute.getName()); if (!Double.isNaN(weight)) { knownAttributes.add(attribute); weightKnown[index++] = true; } else { weightKnown[index++] = false; } } // determine number of attributes that should be selected int nrAtts = knownAttributes.size(); int k = getParameterAsInt(PARAMETER_K); if (relation == ALLBUTTOPK) { relation = BOTTOMK; k = nrAtts - k; } if (relation == ALLBUTBOTTOMK) { relation = TOPK; k = nrAtts - k; } if (relation == TOPPPERCENT) { relation = TOPK; k = (int) Math.round(nrAtts * getParameterAsDouble(PARAMETER_P)); } if (relation == BOTTOMPPERCENT) { relation = BOTTOMK; k = (int) Math.round(nrAtts * getParameterAsDouble(PARAMETER_P)); } if (k < 1) k = 1; if (k > nrAtts) k = nrAtts; // top k or bottom k if ((relation == TOPK) || (relation == BOTTOMK)) { int direction = AttributeWeights.INCREASING; if (relation == BOTTOMK) direction = AttributeWeights.DECREASING; int comparatorType = AttributeWeights.ORIGINAL_WEIGHTS; if (useAbsoluteWeights) comparatorType = AttributeWeights.ABSOLUTE_WEIGHTS; String[] attributeNames = new String[knownAttributes.size()]; index = 0; for (Attribute attribute : knownAttributes) { attributeNames[index++] = attribute.getName(); } weights.sortByWeight(attributeNames, direction, comparatorType); Iterator<Attribute> iterator = eSet.getAttributes().iterator(); index = 0; while (iterator.hasNext()) { Attribute attribute = iterator.next(); if (!weightKnown[index]) { if (deselectUnknown) { iterator.remove(); } } else { boolean remove = true; for (int i = 0; i < k; i++) { if (attribute.getName().equals(attributeNames[i])) { remove = false; break; } } if (remove) iterator.remove(); } index++; } } else { // simple relations Iterator<Attribute> iterator = eSet.getAttributes().iterator(); while (iterator.hasNext()) { Attribute attribute = iterator.next(); double weight = weights.getWeight(attribute.getName()); if (useAbsoluteWeights) weight = Math.abs(weight); if (Double.isNaN(weight) && (deselectUnknown)) { iterator.remove(); } else { switch (relation) { case GREATER: if (weight <= relationWeight) iterator.remove(); break; case GREATER_EQUALS: if (weight < relationWeight) iterator.remove(); break; case EQUALS: if (weight != relationWeight) iterator.remove(); break; case LESS_EQUALS: if (weight > relationWeight) iterator.remove(); break; case LESS: if (weight >= relationWeight) iterator.remove(); break; } } } } return new IOObject[] { eSet }; } public InputDescription getInputDescription(Class cls) { if (AttributeWeights.class.isAssignableFrom(cls)) { return new InputDescription(cls, false, true); } else { return super.getInputDescription(cls); } } public Class<?>[] getInputClasses() { return new Class[] { ExampleSet.class, AttributeWeights.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeDouble(PARAMETER_WEIGHT, "Use this weight for the selection relation.", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 1.0d); type.setExpert(false); types.add(type); type = new ParameterTypeCategory(PARAMETER_WEIGHT_RELATION, "Selects only weights which fulfill this relation.", WEIGHT_RELATIONS, GREATER_EQUALS); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_K, "Number k of attributes to be selected for weight-relations 'top k' or 'bottom k'.", 1, Integer.MAX_VALUE, 10); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_P, "Percentage of attributes to be selected for weight-relations 'top p%' or 'bottom p%'.", 0.0d, 1.0d, 0.5d); type.setExpert(false); types.add(type); types.add(new ParameterTypeBoolean(PARAMETER_DESELECT_UNKNOWN, "Indicates if attributes which weight is unknown should be deselected.", true)); types.add(new ParameterTypeBoolean(PARAMETER_USE_ABSOLUTE_WEIGHTS, "Indicates if the absolute values of the weights should be used for comparison.", true)); return types; } }