/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.construction; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.OperatorVersion; import com.rapidminer.operator.ProcessSetupError.Severity; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.operator.ports.metadata.SimpleMetaDataError; import com.rapidminer.operator.preprocessing.filter.ChangeAttributeName; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeExpression; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.expression.ExampleResolver; import com.rapidminer.tools.expression.ExpressionException; import com.rapidminer.tools.expression.ExpressionParser; import com.rapidminer.tools.expression.internal.ExpressionParserUtils; import com.rapidminer.tools.expression.internal.UnknownResolverVariableException; /** * <p> * This operator constructs new attributes from the attributes of the input example set. The names * of the new attributes and their construction description are defined in the parameter list * "functions". * </p> * * <p> * The following <em>operators</em> are supported: * <ul> * <li>Addition: +</li> * <li>Subtraction: -</li> * <li>Multiplication: *</li> * <li>Division: /</li> * <li>Power: ^</li> * <li>Modulus: %</li> * <li>Less Than: <</li> * <li>Greater Than: ></li> * <li>Less or Equal: <=</li> * <li>More or Equal: >=</li> * <li>Equal: ==</li> * <li>Not Equal: !=</li> * <li>Boolean Not: !</li> * <li>Boolean And: &&</li> * <li>Boolean Or: ||</li> * </ul> * </p> * * <p> * The following <em>log and exponential functions</em> are supported: * <ul> * <li>Natural Logarithm: ln(x)</li> * <li>Logarithm Base 10: log(x)</li> * <li>Logarithm Dualis (Base 2): ld(x)</li> * <li>Exponential (e^x): exp(x)</li> * <li>Power: pow(x,y)</li> * </ul> * </p> * * <p> * The following <em>trigonometric functions</em> are supported: * <ul> * <li>Sine: sin(x)</li> * <li>Cosine: cos(x)</li> * <li>Tangent: tan(x)</li> * <li>Arc Sine: asin(x)</li> * <li>Arc Cosine: acos(x)</li> * <li>Arc Tangent: atan(x)</li> * <li>Arc Tangent (with 2 parameters): atan2(x,y)</li> * <li>Hyperbolic Sine: sinh(x)</li> * <li>Hyperbolic Cosine: cosh(x)</li> * <li>Hyperbolic Tangent: tanh(x)</li> * <li>Inverse Hyperbolic Sine: asinh(x)</li></li> * <li>Inverse Hyperbolic Cosine: acosh(x)</li></li> * <li>Inverse Hyperbolic Tangent: atanh(x)</li></li> * </ul> * </p> * * <p> * The following <em>statistical functions</em> are supported: * <ul> * <li>Round: round(x)</li> * <li>Round to p decimals: round(x,p)</li> * <li>Floor: floor(x)</li> * <li>Ceiling: ceil(x)</li> * </ul> * </p> * * <p> * The following <em>aggregation functions</em> are supported: * <ul> * <li>Average: avg(x,y,z...)</li> * <li>Minimum: min(x,y,z...)</li> * <li>Maximum: max(x,y,z...)</li> * </ul> * </p> * * <p> * The following <em>text functions</em> are supported: * <ul> * <li>Number to String: str(x)</li> * <li>String to Number: parse(text)</li> * <li>Substring: cut(text, start, length)</li> * <li>Concatenation (also possible by "+"): concat(text1, text2, text3...)</li> * <li>Replace: replace(text, what, by)</li> * <li>Replace All: replaceAll(text, what, by)</li> * <li>To lower case: lower(text)</li> * <li>To upper case: upper(text)</li> * <li>First position of string in text: index(text, string)</li> * <li>Length: length(text)</li> * <li>Character at position pos in text: char(text, pos)</li> * <li>Compare: compare(text1, text2)</li> * <li>Contains string in text: contains(text, string)</li> * <li>Equals: equals(text1, text2)</li> * <li>Starts with string: starts(text, string)</li> * <li>Ends with string: ends(text, string)</li> * <li>Matches with regular expression exp: matches(text, exp)</li> * <li>Suffix of length: suffix(text, length)</li> * <li>Prefix of length: prefix(text, length)</li> * <li>Trim (remove leading and trailing whitespace): trim(text)</li> * </ul> * </p> * * <p> * The following <em>miscellaneous functions</em> are supported: * <ul> * <li>If-Then-Else: if(cond,true-evaluation, false-evaluation)</li> * <li>Absolute: abs(x)</li> * <li>Constant: const(x)</li> * <li>Square Root: sqrt(x)</li> * <li>Signum (delivers the sign of a number): sgn(x)</li> * <li>Random Number (between 0 and 1): rand()</li> * <li>Modulus (x % y): mod(x,y)</li> * <li>Sum of k Numbers: sum(x,y,z...)</li> * <li>Binomial Coefficients: binom(n, i)</li> * <li>Retrieving parameter value: param(operator name, parameter name)</li> * </ul> * </p> * * <p> * The following <em>process related functions</em> are supported: * <ul> * <li>Retrieving a parameter value: param("operator", "parameter")</li> * </ul> * </p> * * * <p> * Beside those operators and functions, this operator also supports the constants pi and e if this * is indicated by the corresponding parameter (default: true). You can also use strings in formulas * (for example in a conditioned if-formula) but the string values have to be enclosed in double * quotes. * </p> * * <p> * Please note that there are some restrictions for the attribute names in order to let this * operator work properly: * <ul> * <li>If the standard constants are usable, attribute names with names like "e" or * "pi" are not allowed.</li> * <li>Attribute names with function or operator names are also not allowed.</li> * <li>Attribute names containing parentheses are not allowed.</li> * </ul> * If these conditions are not fulfilled, the names must be changed beforehand, for example with the * {@link ChangeAttributeName} operator. * </p> * * <p> * <br/> * <em>Examples:</em><br/> * a1+sin(a2*a3)<br/> * if (att1>5, att2*att3, -abs(att1))<br/> * </p> * * @author Ingo Mierswa */ public class AttributeConstruction extends AbstractFeatureConstruction { /** The parameter name for "List of functions to generate." */ public static final String PARAMETER_FUNCTIONS = "function_descriptions"; /** * The parameter name for "If set to true, all the original attributes are kept, otherwise * they are removed from the example set." */ public static final String PARAMETER_KEEP_ALL = "keep_all"; public AttributeConstruction(OperatorDescription description) { super(description); } @Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) { List<AttributeMetaData> originalAttributes = new LinkedList<>(); for (AttributeMetaData attribute : metaData.getAllAttributes()) { originalAttributes.add(attribute); } List<String> newAttributeNames = new LinkedList<>(); ExampleResolver resolver = new ExampleResolver(metaData); ExpressionParser parser = ExpressionParserUtils.createAllModulesParser(this, resolver); try { List<String[]> parameterList = getParameterList(PARAMETER_FUNCTIONS); for (String[] nameFunctionPair : parameterList) { String name = nameFunctionPair[0]; String function = nameFunctionPair[1]; try { AttributeMetaData amd = ExpressionParserUtils.generateAttributeMetaData(metaData, name, parser.parse(function).getExpressionType()); newAttributeNames.add(name); metaData.addAttribute(amd); // update resolver meta data after meta data change // in case more than one attribute is generated if (parameterList.size() > 1) { resolver.addAttributeMetaData(amd); } } catch (ExpressionException e) { if (e.getCause() != null && e.getCause() instanceof UnknownResolverVariableException) { // in case a resolver variable cannot be resolved, return a new attribute // with nominal type metaData.addAttribute(new AttributeMetaData(name, Ontology.NOMINAL)); } else { // in all other cases abort meta data generation, add an error and return // empty meta data getExampleSetOutputPort().addError(new SimpleMetaDataError(Severity.ERROR, this.getExampleSetOutputPort(), "cannot_create_exampleset_metadata", e.getShortMessage())); return new ExampleSetMetaData(); } } } if (!getParameterAsBoolean(PARAMETER_KEEP_ALL)) { for (AttributeMetaData attribute : originalAttributes) { if (!newAttributeNames.contains(attribute.getName())) { metaData.removeAttribute(attribute); } } } } catch (UndefinedParameterError e) { // ignore } return metaData; } @Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { List<Attribute> originalAttributes = new ArrayList<>(); for (Attribute attribute : exampleSet.getAttributes()) { originalAttributes.add(attribute); } // create resolver and parser ExampleResolver resolver = new ExampleResolver(exampleSet); ExpressionParser expParser = ExpressionParserUtils.createAllModulesParser(this, resolver); // iterate over new attributes and generate them List<String> newAttributeNames = new LinkedList<>(); List<String[]> parameterList = getParameterList(PARAMETER_FUNCTIONS); for (String[] nameFunctionPair : parameterList) { String name = nameFunctionPair[0]; String function = nameFunctionPair[1]; try { Attribute newAttribute = ExpressionParserUtils.addAttribute(exampleSet, name, function, expParser, resolver, this); newAttributeNames.add(newAttribute.getName()); } catch (ExpressionException e) { throw ExpressionParserUtils.convertToUserError(this, function, e); } checkForStop(); } if (!getParameterAsBoolean(PARAMETER_KEEP_ALL)) { for (Attribute attribute : originalAttributes) { if (!newAttributeNames.contains(attribute.getName())) { exampleSet.getAttributes().remove(attribute); } } } return exampleSet; } @Override public OperatorVersion[] getIncompatibleVersionChanges() { // add expression parser version change to allow usage of old functions return ExpressionParserUtils.addIncompatibleExpressionParserChange(super.getIncompatibleVersionChanges()); } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeList(PARAMETER_FUNCTIONS, "List of functions to generate.", new ParameterTypeString("attribute_name", "Specifies the name of the constructed attribute"), new ParameterTypeExpression("function_expressions", "Function and arguments to use for generation.", getInputPort())); type.setExpert(false); types.add(type); types.add(new ParameterTypeBoolean(PARAMETER_KEEP_ALL, "If set to true, all the original attributes are kept, otherwise they are removed from the example set.", true)); return types; } }