/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.selection; import java.util.Iterator; import java.util.List; import java.util.Vector; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.operator.preprocessing.AbstractDataProcessing; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.conditions.EqualTypeCondition; /** * This operator selects all attributes which have a weight satisfying a given * condition. For example, only attributes with a weight greater than * <code>min_weight</code> should be selected. This operator is also able * to select the k attributes with the highest weight. * * @author Ingo Mierswa, Stefan Rueping */ public class AttributeWeightSelection extends AbstractDataProcessing { /** The parameter name for "Use this weight for the selection relation." */ public static final String PARAMETER_WEIGHT = "weight"; /** The parameter name for "Selects only weights which fulfill this relation." */ public static final String PARAMETER_WEIGHT_RELATION = "weight_relation"; /** The parameter name for "Number k of attributes to be selected for weight-relations 'top k' or 'bottom k'." */ public static final String PARAMETER_K = "k"; /** The parameter name for "Percentage of attributes to be selected for weight-relations 'top p%' or 'bottom p%'." */ public static final String PARAMETER_P = "p"; /** The parameter name for "Indicates if attributes which weight is unknown should be deselected." */ public static final String PARAMETER_DESELECT_UNKNOWN = "deselect_unknown"; /** The parameter name for "Indicates if the absolute values of the weights should be used for comparison." */ public static final String PARAMETER_USE_ABSOLUTE_WEIGHTS = "use_absolute_weights"; private static final String[] WEIGHT_RELATIONS = { "greater", "greater equals", "equals", "less equals", "less", "top k", "bottom k", "all but top k", "all but bottom k", "top p%", "bottom p%" }; private static final int GREATER = 0; private static final int GREATER_EQUALS = 1; private static final int EQUALS = 2; private static final int LESS_EQUALS = 3; private static final int LESS = 4; private static final int TOPK = 5; private static final int BOTTOMK = 6; private static final int ALLBUTTOPK = 7; private static final int ALLBUTBOTTOMK = 8; private static final int TOPPPERCENT = 9; private static final int BOTTOMPPERCENT = 10; private InputPort weightsInput = getInputPorts().createPort("weights", AttributeWeights.class); private OutputPort weightsOutput = getOutputPorts().createPort("weights"); public AttributeWeightSelection(OperatorDescription description) { super(description); getTransformer().addPassThroughRule(weightsInput, weightsOutput); } @Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) { metaData.attributesAreSubset(); return metaData; } @Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { AttributeWeights weights = weightsInput.getData(); boolean deselectUnknown = getParameterAsBoolean(PARAMETER_DESELECT_UNKNOWN); double relationWeight = getParameterAsDouble(PARAMETER_WEIGHT); int relation = getParameterAsInt(PARAMETER_WEIGHT_RELATION); boolean useAbsoluteWeights = getParameterAsBoolean(PARAMETER_USE_ABSOLUTE_WEIGHTS); // determine which attributes have a known weight value boolean[] weightKnown = new boolean[exampleSet.getAttributes().size()]; Vector<Attribute> knownAttributes = new Vector<Attribute>(); int index = 0; for (Attribute attribute : exampleSet.getAttributes()) { double weight = weights.getWeight(attribute.getName()); if (!Double.isNaN(weight)) { knownAttributes.add(attribute); weightKnown[index++] = true; } else { weightKnown[index++] = false; } } // determine number of attributes that should be selected int nrAtts = knownAttributes.size(); int k = getParameterAsInt(PARAMETER_K); if (relation == ALLBUTTOPK) { relation = BOTTOMK; k = nrAtts - k; } if (relation == ALLBUTBOTTOMK) { relation = TOPK; k = nrAtts - k; } if (relation == TOPPPERCENT) { relation = TOPK; k = (int) Math.round(nrAtts * getParameterAsDouble(PARAMETER_P)); } if (relation == BOTTOMPPERCENT) { relation = BOTTOMK; k = (int) Math.round(nrAtts * getParameterAsDouble(PARAMETER_P)); } if (k < 1) k = 1; if (k > nrAtts) k = nrAtts; // top k or bottom k if ((relation == TOPK) || (relation == BOTTOMK)) { int direction = AttributeWeights.DECREASING; if (relation == BOTTOMK) direction = AttributeWeights.INCREASING; int comparatorType = AttributeWeights.ORIGINAL_WEIGHTS; if (useAbsoluteWeights) comparatorType = AttributeWeights.ABSOLUTE_WEIGHTS; String[] attributeNames = new String[knownAttributes.size()]; index = 0; for (Attribute attribute : knownAttributes) { attributeNames[index++] = attribute.getName(); } weights.sortByWeight(attributeNames, direction, comparatorType); Iterator<Attribute> iterator = exampleSet.getAttributes().iterator(); index = 0; while (iterator.hasNext()) { Attribute attribute = iterator.next(); if (!weightKnown[index]) { if (deselectUnknown) { iterator.remove(); } } else { boolean remove = true; for (int i = 0; i < k; i++) { if (attribute.getName().equals(attributeNames[i])) { remove = false; break; } } if (remove) iterator.remove(); } index++; } } else { // simple relations Iterator<Attribute> iterator = exampleSet.getAttributes().iterator(); while (iterator.hasNext()) { Attribute attribute = iterator.next(); double weight = weights.getWeight(attribute.getName()); if (useAbsoluteWeights) weight = Math.abs(weight); if (Double.isNaN(weight) && (deselectUnknown)) { iterator.remove(); } else { switch (relation) { case GREATER: if (weight <= relationWeight) iterator.remove(); break; case GREATER_EQUALS: if (weight < relationWeight) iterator.remove(); break; case EQUALS: if (weight != relationWeight) iterator.remove(); break; case LESS_EQUALS: if (weight > relationWeight) iterator.remove(); break; case LESS: if (weight >= relationWeight) iterator.remove(); break; } } } } weightsOutput.deliver(weights); return exampleSet; } @Override public boolean shouldAutoConnect(OutputPort port) { if (port == weightsOutput) { return getParameterAsBoolean("keep_attribute_weights"); } else { return super.shouldAutoConnect(port); } } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeCategory(PARAMETER_WEIGHT_RELATION, "Selects only weights which fulfill this relation.", WEIGHT_RELATIONS, GREATER_EQUALS); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_WEIGHT, "The selected relation will be evaluated against this value.", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 1.0d); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_WEIGHT_RELATION, WEIGHT_RELATIONS, true, GREATER, GREATER_EQUALS, LESS, LESS_EQUALS, EQUALS)); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_K, "Number k of attributes to be selected. For example 'top k' with k = 5 will return an exampleset containing only the 5 highest weighted attributes.", 1, Integer.MAX_VALUE, 10); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_WEIGHT_RELATION, WEIGHT_RELATIONS, true, TOPK, BOTTOMK, ALLBUTBOTTOMK, ALLBUTTOPK)); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_P, "Percentage of attributes to be selected. For example 'top p%' with p = 15 will return an exampleset containing only attributes which are part of the 15% of the highest weighted attributes.", 0.0d, 1.0d, 0.5d); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_WEIGHT_RELATION, WEIGHT_RELATIONS, true, TOPPPERCENT, BOTTOMPPERCENT)); type.setExpert(false); types.add(type); types.add(new ParameterTypeBoolean(PARAMETER_DESELECT_UNKNOWN, "Indicates if attributes which weight is unknown should be removed from example set.", true)); types.add(new ParameterTypeBoolean(PARAMETER_USE_ABSOLUTE_WEIGHTS, "Indicates if the absolute values of the weights should be used for comparison.", true)); return types; } @Override public boolean writesIntoExistingData() { return false; } }