/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* AddExpression.java
* Copyright (C) 2000 Mark Hall
*
*/
package weka.filters.unsupervised.attribute;
import weka.filters.*;
import java.io.*;
import java.util.StringTokenizer;
import java.util.Stack;
import java.util.Vector;
import java.util.Enumeration;
import weka.core.Utils;
import weka.core.OptionHandler;
import weka.core.Option;
import weka.core.Instance;
import weka.core.SparseInstance;
import weka.core.Instances;
import weka.core.Attribute;
/**
* Applys a mathematical expression involving attributes and numeric
* constants to a dataset. A new attribute is appended after the last
* attribute that contains the result of applying the expression.
* Supported operators are: +, -, *, /, ^, log, abs, cos, exp, sqrt,
* floor, ceil, rint, tan, sin, (, ). Attributes are specified
* by prefixing with 'a', eg. a7 is attribute number 7 (starting from 1). <p>
*
* Valid filter-specific options are:<p>
*
* -E expression <br>
* Specify the expression to apply. Eg. a1^2*a5/log(a7*4.0). <p>
*
* -N name <br>
* Specify a name for the new attribute. Default is to name it with the
* expression provided with the -E option. <p>
*
* -D <br>
* Debug. Names the attribute with the postfix parse of the expression. <p>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision: 1.1.1.1 $
*/
public class AddExpression extends Filter
implements UnsupervisedFilter, StreamableFilter, OptionHandler {
/**
* Inner class handling an attribute index as an operand
*/
private class AttributeOperand implements Serializable {
/** the index of the attribute */
protected int m_attributeIndex;
/** true if the value of the attribute are to be multiplied by -1 */
protected boolean m_negative;
public AttributeOperand(String operand, boolean sign) throws Exception {
// strip the leading 'a' and set the index
m_attributeIndex = (Integer.parseInt(operand.substring(1)))-1;
m_negative = sign;
}
/**
* Return a string describing this object
* @return a string descibing the attribute operand
*/
public String toString() {
String result = "";
if (m_negative) {
result += '-';
}
return result+"a"+(m_attributeIndex+1);
}
}
/**
* Inner class for storing numeric constant opperands
*/
private class NumericOperand implements Serializable {
/** numeric constant */
protected double m_numericConst;
public NumericOperand(String operand, boolean sign) throws Exception {
m_numericConst = Double.valueOf(operand).doubleValue();
if (sign) {
m_numericConst *= -1.0;
}
}
/**
* Return a string describing this object
* @return a string descibing the numeric operand
*/
public String toString() {
return ""+m_numericConst;
}
}
/**
* Inner class for storing operators
*/
private class Operator implements Serializable {
/** the operator */
protected char m_operator;
public Operator(char opp) {
if (!isOperator(opp)) {
throw new IllegalArgumentException("Unrecognized operator:" + opp);
}
m_operator = opp;
}
/**
* Apply this operator to the supplied arguments
* @param first the first argument
* @param second the second argument
* @return the result
*/
protected double applyOperator(double first, double second) {
switch (m_operator) {
case '+' :
return (first+second);
case '-' :
return (first-second);
case '*' :
return (first*second);
case '/' :
return (first/second);
case '^' :
return Math.pow(first,second);
}
return Double.NaN;
}
/**
* Apply this operator (function) to the supplied argument
* @param value the argument
* @return the result
*/
protected double applyFunction(double value) {
switch (m_operator) {
case 'l' :
return Math.log(value);
case 'b' :
return Math.abs(value);
case 'c' :
return Math.cos(value);
case 'e' :
return Math.exp(value);
case 's' :
return Math.sqrt(value);
case 'f' :
return Math.floor(value);
case 'h' :
return Math.ceil(value);
case 'r' :
return Math.rint(value);
case 't' :
return Math.tan(value);
case 'n' :
return Math.sin(value);
}
return Double.NaN;
}
/**
* Return a string describing this object
* @return a string descibing the operator
*/
public String toString() {
return ""+m_operator;
}
}
/** The infix expression */
private String m_infixExpression = "a1^2";
/** Operator stack */
private Stack m_operatorStack = new Stack();
/** Supported operators. l = log, b = abs, c = cos, e = exp, s = sqrt,
f = floor, h = ceil, r = rint, t = tan, n = sin */
private static final String OPERATORS = "+-*/()^lbcesfhrtn";
private static final String UNARY_FUNCTIONS = "lbcesfhrtn";
/** Holds the expression in postfix form */
private Vector m_postFixExpVector;
/** True if the next numeric constant or attribute index is negative */
private boolean m_signMod = false;
/** Holds the previous token */
private String m_previousTok = "";
/** Name of the new attribute. "expression" length string will use the
provided expression as the new attribute name */
private String m_attributeName="expression";
/** If true, makes the attribute name equal to the postfix parse of the
expression */
private boolean m_Debug = false;
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "An instance filter that creates a new attribute by applying a "
+"mathematical expression to existing attributes. The expression "
+"can contain attribute references and numeric constants. Supported "
+"opperators are : +, -, *, /, ^, log, abs, cos, exp, sqrt, "
+"floor, ceil, rint, tan, sin, (, ). Attributes are specified "
+"by prefixing with 'a', eg. a7 is attribute number 7 (starting from 1)."
+" Example expression : a1^2*a5/log(a7*4.0).";
}
/**
* Handles the processing of an infix operand to postfix
* @param tok the infix operand
* @exception Exception if there is difficulty parsing the operand
*/
private void handleOperand(String tok) throws Exception {
// if it contains an 'a' then its an attribute index
if (tok.indexOf('a') != -1) {
m_postFixExpVector.addElement(new AttributeOperand(tok,m_signMod));
} else {
try {
// should be a numeric constant
m_postFixExpVector.addElement(new NumericOperand(tok, m_signMod));
} catch (NumberFormatException ne) {
throw new Exception("Trouble parsing numeric constant");
}
}
m_signMod = false;
}
/**
* Handles the processing of an infix operator to postfix
* @param tok the infix operator
* @exception Exception if there is difficulty parsing the operator
*/
private void handleOperator(String tok) throws Exception {
boolean push = true;
char tokchar = tok.charAt(0);
if (tokchar == ')') {
String popop = " ";
do {
popop = (String)(m_operatorStack.pop());
if (popop.charAt(0) != '(') {
m_postFixExpVector.addElement(new Operator(popop.charAt(0)));
}
} while (popop.charAt(0) != '(');
} else {
int infixToc = infixPriority(tok.charAt(0));
while (!m_operatorStack.empty() &&
stackPriority(((String)(m_operatorStack.peek())).charAt(0))
>= infixToc) {
// try an catch double operators and see if the current one can
// be interpreted as the sign of an upcoming number
if (m_previousTok.length() == 1 &&
isOperator(m_previousTok.charAt(0)) &&
m_previousTok.charAt(0) != ')') {
if (tok.charAt(0) == '-') {
m_signMod = true;
} else {
m_signMod = false;
}
push = false;
break;
} else {
String popop = (String)(m_operatorStack.pop());
m_postFixExpVector.addElement(new Operator(popop.charAt(0)));
}
}
if (m_postFixExpVector.size() == 0) {
if (tok.charAt(0) == '-') {
m_signMod = true;
push = false;
}
}
if (push) {
m_operatorStack.push(tok);
}
}
}
/**
* Converts a string containing a mathematical expression in infix form
* to postfix form. The result is stored in the vector m_postfixExpVector
* @param infixExp the infix expression to convert
* @exception Exception if something goes wrong during the conversion
*/
private void convertInfixToPostfix(String infixExp) throws Exception {
infixExp = Utils.removeSubstring(infixExp, " ");
infixExp = Utils.replaceSubstring(infixExp,"log","l");
infixExp = Utils.replaceSubstring(infixExp,"abs","b");
infixExp = Utils.replaceSubstring(infixExp,"cos","c");
infixExp = Utils.replaceSubstring(infixExp,"exp","e");
infixExp = Utils.replaceSubstring(infixExp,"sqrt","s");
infixExp = Utils.replaceSubstring(infixExp,"floor","f");
infixExp = Utils.replaceSubstring(infixExp,"ceil","h");
infixExp = Utils.replaceSubstring(infixExp,"rint","r");
infixExp = Utils.replaceSubstring(infixExp,"tan","t");
infixExp = Utils.replaceSubstring(infixExp,"sin","n");
StringTokenizer tokenizer = new StringTokenizer(infixExp, OPERATORS, true);
m_postFixExpVector = new Vector();
while (tokenizer.hasMoreTokens()) {
String tok = tokenizer.nextToken();
if (tok.length() > 1) {
handleOperand(tok);
} else {
// probably an operator, but could be a single char operand
if (isOperator(tok.charAt(0))) {
handleOperator(tok);
} else {
// should be a numeric constant
handleOperand(tok);
}
}
m_previousTok = tok;
}
while (!m_operatorStack.empty()) {
String popop = (String)(m_operatorStack.pop());
if (popop.charAt(0) == '(' || popop.charAt(0) == ')') {
throw new Exception("Mis-matched parenthesis!");
}
m_postFixExpVector.addElement(new Operator(popop.charAt(0)));
}
}
/**
* Evaluate the expression using the supplied array of attribute values.
* The result is stored in the last element of the array. Assumes that
* the infix expression has been converted to postfix and stored in
* m_postFixExpVector
* @param vals the values to apply the expression to
* @exception Exception if something goes wrong
*/
private void evaluateExpression(double [] vals) throws Exception {
Stack operands = new Stack();
for (int i=0;i<m_postFixExpVector.size();i++) {
Object nextob = m_postFixExpVector.elementAt(i);
if (nextob instanceof NumericOperand) {
operands.push(new Double(((NumericOperand)nextob).m_numericConst));
} else if (nextob instanceof AttributeOperand) {
double value = vals[((AttributeOperand)nextob).m_attributeIndex];
if (value == Instance.missingValue()) {
vals[vals.length-1] = Instance.missingValue();
break;
}
if (((AttributeOperand)nextob).m_negative) {
value = -value;
}
operands.push(new Double(value));
} else if (nextob instanceof Operator) {
char op = ((Operator)nextob).m_operator;
if (isUnaryFunction(op)) {
double operand = ((Double)operands.pop()).doubleValue();
double result = ((Operator)nextob).applyFunction(operand);
operands.push(new Double(result));
} else {
double second = ((Double)operands.pop()).doubleValue();
double first = ((Double)operands.pop()).doubleValue();
double result = ((Operator)nextob).applyOperator(first,second);
operands.push(new Double(result));
}
} else {
throw new Exception("Unknown object in postfix vector!");
}
}
if (operands.size() != 1) {
throw new Exception("Problem applying function");
}
Double result = ((Double)operands.pop());
if (result.isNaN() || result.isInfinite()) {
vals[vals.length-1] = Instance.missingValue();
} else {
vals[vals.length-1] = result.doubleValue();
}
}
/**
* Returns true if a token is an operator
* @param tok the token to check
* @return true if the supplied token is an operator
*/
private boolean isOperator(char tok) {
if (OPERATORS.indexOf(tok) == -1) {
return false;
}
return true;
}
/**
* Returns true if a token is a unary function
* @param tok the token to check
* @return true if the supplied token is a unary function
*/
private boolean isUnaryFunction(char tok) {
if (UNARY_FUNCTIONS.indexOf(tok) == -1) {
return false;
}
return true;
}
/**
* Return the infix priority of an operator
* @param char the operator
* @return the infix priority
*/
private int infixPriority(char opp) {
switch (opp) {
case 'l' :
case 'b' :
case 'c' :
case 'e' :
case 's' :
case 'f' :
case 'h' :
case 'r' :
case 't' :
case 'n' :
return 3;
case '^' :
return 2;
case '*' :
return 2;
case '/' :
return 2;
case '+' :
return 1;
case '-' :
return 1;
case '(' :
return 4;
case ')' :
return 0;
default :
throw new IllegalArgumentException("Unrecognized operator:" + opp);
}
}
/**
* Return the stack priority of an operator
* @param char the operator
* @return the stack priority
*/
private int stackPriority(char opp) {
switch (opp) {
case 'l' :
case 'b' :
case 'c' :
case 'e' :
case 's' :
case 'f' :
case 'h' :
case 'r' :
case 't' :
case 'n' :
return 3;
case '^' :
return 2;
case '*' :
return 2;
case '/' :
return 2;
case '+' :
return 1;
case '-' :
return 1;
case '(' :
return 0;
case ')' :
return -1;
default :
throw new IllegalArgumentException("Unrecognized operator:" + opp);
}
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(3);
newVector.addElement(new Option(
"\tSpecify the expression to apply. Eg a1^2*a5/log(a7*4.0)."
+"\n\tSupported opperators: ,+, -, *, /, ^, log, abs, cos, "
+"\n\texp, sqrt, floor, ceil, rint, tan, sin, (, )",
"E",1,"-E <expression>"));
newVector.addElement(new Option(
"\tSpecify the name for the new attribute. (default is the "
+"expression provided with -E)",
"N",1,"-N <name>"));
newVector.addElement(new Option(
"\tDebug. Names attribute with the postfix parse of the "
+"expression.","D",0,"-D"));
return newVector.elements();
}
/**
* Parses a list of options for this object. Valid options are:<p>
*
* -E expression <br>
* Specify the expression to apply. Eg. a1^2*a5/log(a7*4.0). <p>
*
* -N name <br>
* Specify a name for the new attribute. Default is to name it with the
* expression provided with the -E option. <p>
*
* -D <br>
* Debug. Names the attribute with the postfix parse of the expression. <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String expString = Utils.getOption('E', options);
if (expString.length() != 0) {
setExpression(expString);
} else {
throw new Exception("Must specify an expression with the -E option");
}
String name = Utils.getOption('N',options);
if (name.length() != 0) {
setName(name);
}
setDebug(Utils.getFlag('D', options));
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [5];
int current = 0;
options[current++] = "-E"; options[current++] = getExpression();
options[current++] = "-N"; options[current++] = getName();
if (getDebug()) {
options[current++] = "-D";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String nameTipText() {
return "Set the name of the new attribute.";
}
/**
* Set the name for the new attribute. The string "expression" can
* be used to make the name of the new attribute equal to the expression
* provided.
* @param name the name of the new attribute
*/
public void setName(String name) {
m_attributeName = name;
}
/**
* Returns the name of the new attribute
* @return the name of the new attribute
*/
public String getName() {
return m_attributeName;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String debugTipText() {
return "Set debug mode. If true then the new attribute will be named with "
+"the postfix parse of the supplied expression.";
}
/**
* Set debug mode. Causes the new attribute to be named with the postfix
* parse of the expression
* @param d true if debug mode is to be used
*/
public void setDebug(boolean d) {
m_Debug = d;
}
/**
* Gets whether debug is set
* @return true if debug is set
*/
public boolean getDebug() {
return m_Debug;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String expressionTipText() {
return "Set the math expression to apply. Eg. a1^2*a5/log(a7*4.0)";
}
/**
* Set the expression to apply
* @param expr a mathematical expression to apply
*/
public void setExpression(String expr) {
m_infixExpression = expr;
}
/**
* Get the expression
* @return the expression
*/
public String getExpression() {
return m_infixExpression;
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored - only the
* structure is required).
* @return true if the outputFormat may be collected immediately
* @exception Exception if the format couldn't be set successfully
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
convertInfixToPostfix(new String(m_infixExpression));
super.setInputFormat(instanceInfo);
Instances outputFormat = new Instances(instanceInfo, 0);
Attribute newAttribute;
if (m_Debug) {
newAttribute = new Attribute(m_postFixExpVector.toString());
} else if (m_attributeName.compareTo("expression") != 0) {
newAttribute = new Attribute(m_attributeName);
} else {
newAttribute = new Attribute(m_infixExpression);
}
outputFormat.insertAttributeAt(newAttribute,
instanceInfo.numAttributes());
setOutputFormat(outputFormat);
return true;
}
/**
* Input an instance for filtering. Ordinarily the instance is processed
* and made available for output immediately. Some filters require all
* instances be read before producing output.
*
* @param instance the input instance
* @return true if the filtered instance may now be
* collected with output().
* @exception IllegalStateException if no input format has been defined.
* @exception Exception if there was a problem during the filtering.
*/
public boolean input(Instance instance) throws Exception {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (m_NewBatch) {
resetQueue();
m_NewBatch = false;
}
double[] vals = new double[instance.numAttributes()+1];
for(int i = 0; i < instance.numAttributes(); i++) {
if (instance.isMissing(i)) {
vals[i] = Instance.missingValue();
} else {
vals[i] = instance.value(i);
}
}
evaluateExpression(vals);
Instance inst = null;
if (instance instanceof SparseInstance) {
inst = new SparseInstance(instance.weight(), vals);
} else {
inst = new Instance(instance.weight(), vals);
}
copyStringValues(inst, false, instance.dataset(), getOutputFormat());
inst.setDataset(getOutputFormat());
push(inst);
return true;
}
/**
* Main method for testing this class.
*
* @param args should contain arguments to the filter: use -h for help
*/
public static void main(String [] args) {
try {
if (Utils.getFlag('b', args)) {
Filter.batchFilterFile(new AddExpression(), args);
} else {
Filter.filterFile(new AddExpression(), args);
}
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
}
}