/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter.attributes;
import java.text.Collator;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.PatternSyntaxException;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessSetupError.Severity;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.features.selection.AbstractFeatureSelection;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDTransformationRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SimpleMetaDataError;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeAttributeOrderingRules;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.Tools;
/**
* This Operator is capable of sorting attributes of an {@link ExampleSet}. Either alphabetically or
* by user specified ordering roles.
*
* @author Nils Woehler
*
*/
public class AttributeOrderingOperator extends AbstractFeatureSelection {
private abstract class FilterConditon {
public abstract boolean match(String rule, String value);
}
private static final String REFERENCE_DATA_PORT_NAME = "reference_data";
// --------------------- Order method ---------------------------------
public static final String PARAMETER_ORDER_MODE = "sort_mode";
public static final String USER_SPECIFIED_RULES_MODE = "user specified";
public static final String ALPHABETICALLY_MODE = "alphabetically";
public static final String REFERENCE_DATA = "reference data";
public static final String[] SORT_MODES = new String[] { USER_SPECIFIED_RULES_MODE, ALPHABETICALLY_MODE,
REFERENCE_DATA };
public static final int USER_SPECIFIED_RULES_MODE_INDEX = 0;
public static final int ALPHABETICALLY_MODE_INDEX = 1;
public static final int REFERENCE_DATA_INDEX = 2;
// --------------------- Sort direction -------------------------------
public static final String PARAMETER_SORT_DIRECTION = "sort_direction";
public static final String DIRECTION_ASCENDING = "ascending";
public static final String DIRECTION_DESCENDING = "descending";
public static final String DIRECTION_NONE = "none";
public static final String[] SORT_DIRECTIONS = new String[] { DIRECTION_ASCENDING, DIRECTION_DESCENDING,
DIRECTION_NONE };
public static final int DIRECTION_ASCENDING_INDEX = 0;
public static final int DIRECTION_DESCENDING_INDEX = 1;
public static final int DIRECTION_NONE_INDEX = 2;
// --------------------- Others ---------------------------------------
public static final String PARAMETER_ORDER_RULES = "attribute_ordering";
public static final String PARAMETER_USE_REGEXP = "use_regular_expressions";
public static final String PARAMETER_HANDLE_UNMATCHED_ATTRIBUTES = "handle_unmatched";
public static final String REMOVE_UNMATCHED_MODE = "remove";
public static final String PREPEND_UNMATCHED_MODE = "prepend";
public static final String APPEND_UNMATCHED_MODE = "append";
public static final String[] HANDLE_UNMATCHED_MODES = { REMOVE_UNMATCHED_MODE, PREPEND_UNMATCHED_MODE,
APPEND_UNMATCHED_MODE };
public static final int REMOVE_UNMATCHED_MODE_INDEX = 0;
public static final int PREPEND_UNMATCHED_MODE_INDEX = 1;
public static final int APPEND_UNMATCHED_MODE_INDEX = 2;
private final InputPort referenceDataPort = getInputPorts().createPort(REFERENCE_DATA_PORT_NAME);
/**
* @param description
*/
public AttributeOrderingOperator(OperatorDescription description) {
super(description);
referenceDataPort.addPrecondition(new SimplePrecondition(referenceDataPort, new MetaData(ExampleSet.class), false));
getTransformer().addRule(new MDTransformationRule() {
@Override
public void transformMD() {
MetaData md1 = getInputPorts().getPortByIndex(0).getMetaData();
OutputPort outputPort = getOutputPorts().getPortByIndex(0);
try {
if (getParameterAsString(PARAMETER_ORDER_MODE).equals(ALPHABETICALLY_MODE)) {
outputPort.deliverMD(md1); // no attributes will be removed, just deliver
// old MD
return;
}
FilterConditon condition = null;
List<String> rules = new LinkedList<>();
if (getParameterAsString(PARAMETER_ORDER_MODE).equals(REFERENCE_DATA)) {
MetaData refMD = getInputPorts().getPortByName(REFERENCE_DATA_PORT_NAME).getMetaData();
if (refMD == null || !(refMD instanceof ExampleSetMetaData)) {
outputPort.deliverMD(new ExampleSetMetaData());
return;
}
ExampleSetMetaData refEsMD = (ExampleSetMetaData) refMD;
List<AttributeMetaData> allReferenceAttributes = new LinkedList<>(refEsMD.getAllAttributes());
// add all attributes that are not special to rules list
for (AttributeMetaData amd : allReferenceAttributes) {
if (!amd.isSpecial()) {
rules.add(amd.getName());
}
}
condition = new FilterConditon() {
@Override
public boolean match(String rule, String value) {
return value.equals(rule);
}
};
}
if (getParameterAsString(PARAMETER_ORDER_MODE).equals(USER_SPECIFIED_RULES_MODE)) {
String combinedMaskedRules = getParameterAsString(PARAMETER_ORDER_RULES);
// if parameter is empty just return old meta data
if (combinedMaskedRules == null || combinedMaskedRules.length() == 0) {
outputPort.deliverMD(new ExampleSetMetaData());
return;
}
String[] splittedRules = combinedMaskedRules.split("\\|");
// unmask rules
for (int i = 0; i < splittedRules.length; i++) {
rules.add(Tools.unmask('|', splittedRules[i]));
}
condition = new FilterConditon() {
@Override
public boolean match(String rule, String value) {
try {
return getParameterAsBoolean(PARAMETER_USE_REGEXP) ? value.matches(rule)
: value.equals(rule);
} catch (PatternSyntaxException e) {
return false;
}
}
};
}
// calculate new meta data
ExampleSetMetaData sortedEmd = applyRulesOnMetaData(rules, md1, condition);
outputPort.deliverMD(sortedEmd);
} catch (UndefinedParameterError e) {
outputPort.deliverMD(new ExampleSetMetaData());
}
}
});
}
private ExampleSetMetaData applyRulesOnMetaData(List<String> rules, MetaData metaData, FilterConditon condition)
throws UndefinedParameterError {
if (metaData == null || !(metaData instanceof ExampleSetMetaData) || condition == null) {
return new ExampleSetMetaData();
}
ExampleSetMetaData sortedMetaData = new ExampleSetMetaData();
ExampleSetMetaData originalMetaData = (ExampleSetMetaData) metaData;
Collection<AttributeMetaData> allAttributes = originalMetaData.getAllAttributes();
// iterate over all rules
for (String currentRule : rules) {
// iterate over all original attributes and check if rule applies
Iterator<AttributeMetaData> iterator = allAttributes.iterator();
while (iterator.hasNext()) {
AttributeMetaData attrMD = iterator.next();
// skip special attributes
if (attrMD.isSpecial()) {
continue;
}
// if rule applies, remove attribute from unmachted list and add it to rules matched
// list
if (condition.match(currentRule, attrMD.getName())) {
iterator.remove();
sortedMetaData.addAttribute(attrMD);
}
}
}
if (!getParameterAsString(PARAMETER_HANDLE_UNMATCHED_ATTRIBUTES).equals(REMOVE_UNMATCHED_MODE)) {
sortedMetaData.addAllAttributes(allAttributes);
}
return sortedMetaData;
}
@Override
protected void performAdditionalChecks() {
super.performAdditionalChecks();
try {
InputPort referenceDataPort = getInputPorts().getPortByName(REFERENCE_DATA_PORT_NAME);
String orderMode = getParameterAsString(PARAMETER_ORDER_MODE);
if (orderMode.equals(REFERENCE_DATA) && !referenceDataPort.isConnected()) {
addError(new SimpleMetaDataError(Severity.ERROR, referenceDataPort, "input_missing",
REFERENCE_DATA_PORT_NAME));
}
if (!orderMode.equals(REFERENCE_DATA) && referenceDataPort.isConnected()) {
addError(new SimpleMetaDataError(Severity.WARNING, referenceDataPort, "port_connected_but_parameter_not_set",
REFERENCE_DATA_PORT_NAME, PARAMETER_ORDER_MODE, orderMode));
}
} catch (UndefinedParameterError e) {
// nothing to do here
}
}
@Override
public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
if (exampleSet == null) {
throw new UserError(this, 149, getInputPorts().getPortByIndex(0).getName());
}
// get unmachted attributes
Attributes attributes = exampleSet.getAttributes();
List<Attribute> unmachtedAttributes = getAttributeList(attributes);
if (getParameterAsString(PARAMETER_ORDER_MODE).equals(ALPHABETICALLY_MODE)) {
if (getParameterAsString(PARAMETER_SORT_DIRECTION).equals(DIRECTION_NONE)) {
return exampleSet;
}
// sort attributes
sortAttributeListAlphabetically(unmachtedAttributes);
// apply sorted attributes
applySortedAttributes(unmachtedAttributes, null, attributes);
} else if (getParameterAsString(PARAMETER_ORDER_MODE).equals(USER_SPECIFIED_RULES_MODE)
|| getParameterAsString(PARAMETER_ORDER_MODE).equals(REFERENCE_DATA)) {
List<Attribute> sortedAttributes = new LinkedList<>();
if (getParameterAsString(PARAMETER_ORDER_MODE).equals(REFERENCE_DATA)) {
InputPort referencePort = getInputPorts().getPortByName(REFERENCE_DATA_PORT_NAME);
ExampleSet referenceSet = referencePort.getData(ExampleSet.class);
if (referenceSet == null) {
throw new UserError(this, 149, referencePort.getName());
}
// iterate over reference attributes and order unmachted attributes accordingly
for (Attribute refAttr : referenceSet.getAttributes()) {
// System.out.println("Check attribute " + refAttr.getName());
Iterator<Attribute> iterator = unmachtedAttributes.iterator();
while (iterator.hasNext()) {
Attribute unmachtedAttr = iterator.next();
if (refAttr.getName().equals(unmachtedAttr.getName())) {
// only pairwise matching is possible -> directly add attribute to
// sorted list
sortedAttributes.add(unmachtedAttr);
// System.out.println("Added unmachted attribute to list: " +
// unmachtedAttr.getName());
// remove attribute from unmachted attributes
iterator.remove();
}
}
}
} else {
boolean useRegexp = getParameterAsBoolean(PARAMETER_USE_REGEXP);
String combinedMaskedRules = getParameterAsString(PARAMETER_ORDER_RULES);
if (combinedMaskedRules == null || combinedMaskedRules.length() == 0) {
throw new UndefinedParameterError(PARAMETER_ORDER_RULES, this);
}
// iterate over all rules
for (String maskedRule : combinedMaskedRules.split("\\|")) {
String rule = Tools.unmask('|', maskedRule); // unmask them to allow regexp
List<Attribute> matchedAttributes = new LinkedList<>();
// iterate over all attributes and check if rules apply
Iterator<Attribute> iterator = unmachtedAttributes.iterator();
while (iterator.hasNext()) {
Attribute attr = iterator.next();
boolean match = false;
if (useRegexp) {
try {
if (attr.getName().matches(rule)) {
match = true;
}
} catch (PatternSyntaxException e) {
throw new UserError(this, 206, rule, e.getMessage());
}
} else {
if (attr.getName().equals(rule)) {
match = true;
}
}
// if rule applies remove attribute from unmachted list and add it to rules
// matched list
if (match) {
iterator.remove();
matchedAttributes.add(attr);
}
}
// sort matched attributes according to sort direction if more then one match
// has been found
if (matchedAttributes.size() > 1) {
sortAttributeListAlphabetically(matchedAttributes);
}
// add matched attributes to sorted attribute list
sortedAttributes.addAll(matchedAttributes);
}
}
/*
* UNMACHTED Handling
*/
if (!getParameterAsString(PARAMETER_HANDLE_UNMATCHED_ATTRIBUTES).equals(REMOVE_UNMATCHED_MODE)) {
// sort unmachted attributes according to sort direction
sortAttributeListAlphabetically(unmachtedAttributes);
if (getParameterAsString(PARAMETER_HANDLE_UNMATCHED_ATTRIBUTES).equals(PREPEND_UNMATCHED_MODE)) {
// prepend attributes to ordered attributes list
sortedAttributes.addAll(0, unmachtedAttributes);
} else {
// append attributes to ordered attributes list
sortedAttributes.addAll(unmachtedAttributes);
}
applySortedAttributes(sortedAttributes, null, attributes);
} else {
applySortedAttributes(sortedAttributes, unmachtedAttributes, attributes);
}
} else {
throw new IllegalArgumentException(
"Order mode " + getParameterAsString(PARAMETER_ORDER_MODE) + " is not implemented!");
}
return exampleSet;
}
private List<Attribute> getAttributeList(Attributes attributes) {
List<Attribute> attributeList = new LinkedList<>();
for (Attribute attr : attributes) {
attributeList.add(attr);
}
return attributeList;
}
/**
* Applies the sorted and unmachted attribute list to the provided {@link Attributes}. All
* unmachted attributes are removed from attributes and all {@link Attribute}s from the sorted
* list are added in correct order.
*
* @param sortedAttributeList
* attributes that will be removed first and added in correct order afterwards.
* @param unmachtedAttributes
* attributes that should be removed. May be <code>null</code> if no attributes
* should be removed.
*/
private void applySortedAttributes(List<Attribute> sortedAttributeList, List<Attribute> unmachtedAttributes,
Attributes attributes) {
if (unmachtedAttributes != null) {
for (Attribute unmachted : unmachtedAttributes) {
attributes.remove(unmachted);
}
}
for (Attribute attribute : sortedAttributeList) {
AttributeRole role = attributes.getRole(attribute);
attributes.remove(attribute);
if (role.isSpecial()) {
attributes.setSpecialAttribute(attribute, role.getSpecialName());
} else { // regular
attributes.addRegular(attribute);
}
}
}
/**
* Sorts a list of attributes alphabetically according to the desired sort direction. CAUTION:
* The provided list 'unsortedAttributeList' will be changed internally.
*/
private void sortAttributeListAlphabetically(List<Attribute> unsortedAttributeList) throws UndefinedParameterError {
// sort direction none -> just return
if (getParameterAsString(PARAMETER_SORT_DIRECTION).equals(DIRECTION_NONE)) {
return;
}
// sort attributes
Collections.sort(unsortedAttributeList, new Comparator<Attribute>() {
@Override
public int compare(Attribute o1, Attribute o2) {
return Collator.getInstance().compare(o1.getName(), o2.getName());
}
});
// if descending, reverse sort
if (getParameterAsString(PARAMETER_SORT_DIRECTION).equals(DIRECTION_DESCENDING)) {
Collections.reverse(unsortedAttributeList);
}
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> parameterTypes = super.getParameterTypes();
ParameterType type = new ParameterTypeCategory(PARAMETER_ORDER_MODE, "Ordering method that should be applied.",
SORT_MODES, USER_SPECIFIED_RULES_MODE_INDEX, false);
parameterTypes.add(type);
// --------------------------- USER SPECIFIED -------------------------
type = new ParameterTypeAttributeOrderingRules(PARAMETER_ORDER_RULES, "Rules to order attributes.",
getInputPorts().getPortByIndex(0), true);
type.setExpert(false);
type.registerDependencyCondition(
new EqualTypeCondition(this, PARAMETER_ORDER_MODE, SORT_MODES, true, USER_SPECIFIED_RULES_MODE_INDEX));
parameterTypes.add(type);
type = new ParameterTypeBoolean(PARAMETER_USE_REGEXP,
"If checked attribute orders will be evaluated as regular expressions.", false, true);
type.registerDependencyCondition(
new EqualTypeCondition(this, PARAMETER_ORDER_MODE, SORT_MODES, true, USER_SPECIFIED_RULES_MODE_INDEX));
parameterTypes.add(type);
type = new ParameterTypeCategory(PARAMETER_HANDLE_UNMATCHED_ATTRIBUTES,
"Defines the behavior for unmatched attributes.", HANDLE_UNMATCHED_MODES, APPEND_UNMATCHED_MODE_INDEX,
false);
type.setOptional(true);
type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ORDER_MODE, SORT_MODES, false,
USER_SPECIFIED_RULES_MODE_INDEX, REFERENCE_DATA_INDEX));
parameterTypes.add(type);
type = new ParameterTypeCategory(PARAMETER_SORT_DIRECTION, "Sort direction for attribute names.", SORT_DIRECTIONS,
DIRECTION_ASCENDING_INDEX, false);
parameterTypes.add(type);
return parameterTypes;
}
@Override
public boolean shouldAutoConnect(InputPort inputPort) {
if (inputPort == referenceDataPort) {
return false;
}
return super.shouldAutoConnect(inputPort);
}
}