/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* AddUserFields.java
* Copyright (C) 2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.filters.unsupervised.attribute;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.DenseInstance;
import weka.core.Environment;
import weka.core.EnvironmentHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.filters.Filter;
/**
*
<!-- options-start -->
Valid options are:
* <p/>
*
* <pre>
* -A <name:type:value>
* New field specification (name@type@value).
* Environment variables may be used for any/all parts of the
* specification. Type can be one of (numeric, nominal, string or date).
* The value for date be a specific date string or the special string
* "now" to indicate the current date-time. A specific date format
* string for parsing specific date values can be specified by suffixing
* the type specification - e.g. "myTime@date:MM-dd-yyyy@08-23-2009".
* This option may be specified multiple times
* </pre>
*
* <!-- options-end -->
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 9002 $
*/
public class AddUserFields extends Filter implements OptionHandler,
EnvironmentHandler {
/** For serialization */
private static final long serialVersionUID = -2761427344847891585L;
/** The new attributes to create */
protected List<AttributeSpec> m_attributeSpecs;
protected transient Environment m_env;
/**
* Inner class encapsulating a new user-specified attribute to create.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
*/
public static class AttributeSpec implements Serializable {
/** For serialization */
private static final long serialVersionUID = -617328946241474608L;
/** The name of the new attribute */
protected String m_name = "";
/** The constant value it should assume */
protected String m_value = "";
/** The type of the new attribute */
protected String m_type = "";
/** The name after resolving any environment variables */
protected String m_nameS;
/** The value after resolving any environment variables */
protected String m_valueS;
/** The type after resolving any environment variables */
protected String m_typeS;
/** The date format to use (if the new attribute is a date) */
protected SimpleDateFormat m_dateFormat;
/** Holds the parsed date value */
protected Date m_parsedDate;
/**
* Default constructor
*/
public AttributeSpec() {
}
/**
* Constructor that takes an attribute specification in internal format
*
* @param spec the attribute spec to use
*/
public AttributeSpec(String spec) {
parseFromInternal(spec);
}
/**
* Set the name of the new attribute
*
* @param name the name of the new attribute
*/
public void setName(String name) {
m_name = name;
}
/**
* Get the name of the new attribute
*
* @return the name of the new attribute
*/
public String getName() {
return m_name;
}
/**
* Set the type of the new attribute
*
* @param type the type of the new attribute
*/
public void setType(String type) {
m_type = type;
}
/**
* Get the type of the new attribute
*
* @return the type of the new attribute
*/
public String getType() {
return m_type;
}
/**
* Set the value of the new attribute. Date attributes can assume a supplied
* date value (parseable by either the default date format or a user
* specified one) or the current time stamp if the user specifies the
* special string "now".
*
* @param value the value of the new attribute
*/
public void setValue(String value) {
m_value = value;
}
/**
* Get the value of the new attribute. Date attributes can assume a supplied
* date value (parseable by either the default date format or a user
* specified one) or the current time stamp if the user specifies the
* special string "now".
*
* @return the value of the new attribute
*/
public String getValue() {
return m_value;
}
/**
* Get the name of the attribute after substituting any environment
* variables
*
* @return the name of the attribute after environment variables have been
* substituted
*/
public String getResolvedName() {
return m_nameS;
}
/**
* Get the value of the attribute after substituting any environment
* variables
*
* @return the value of the attribute after environment variables have been
* substituted
*/
public String getResolvedValue() {
return m_valueS;
}
/**
* Get the type of the attribute after substituting any environment
* variables
*
* @return the tyep of the attribute after environment variables have been
* substituted
*/
public String getResolvedType() {
return m_typeS;
}
/**
* Get the date formatting string (if any)
*
* @return the date formatting string
*/
public String getDateFormat() {
if (m_dateFormat != null) {
return m_dateFormat.toPattern();
} else {
return null;
}
}
/**
* Get the value of the attribute as a date or null if the attribute isn't
* of type date.
*
* @return the value as a date
*/
public Date getDateValue() {
if (m_parsedDate != null) {
return m_parsedDate;
}
if (getResolvedType().toLowerCase().startsWith("date")) {
return new Date(); // now
}
return null; // not a date attribute
}
/**
* Get the value of the attribute as a number or Utils.missingValue() if the
* attribute is not numeric.
*
* @return the value of the attribute as a number
*/
public double getNumericValue() {
if (getResolvedType().toLowerCase().startsWith("numeric")) {
return Double.parseDouble(getResolvedValue());
}
return Utils.missingValue(); // not a numeric attribute
}
/**
* Get the value of the attribute as a string (nominal and string attribute)
* or null if the attribute is not nominal or string
*
* @return the value of the attribute as a string
*/
public String getNominalOrStringValue() {
if (getResolvedType().toLowerCase().startsWith("nominal")
|| getResolvedType().toLowerCase().startsWith("string")) {
return getResolvedValue();
}
return null; // not a nominal or string attribute
}
protected void parseFromInternal(String spec) {
String[] parts = spec.split("@");
if (parts.length > 0) {
m_name = parts[0].trim();
}
if (parts.length > 1) {
m_type = parts[1].trim();
}
if (parts.length > 2) {
m_value = parts[2].trim();
}
}
/**
* Initialize this attribute spec by resolving any environment variables and
* setting up the date format (if necessary)
*
* @param env environment variables to use
*/
public void init(Environment env) {
m_nameS = m_name;
m_typeS = m_type;
m_valueS = m_value;
try {
m_nameS = env.substitute(m_nameS);
m_typeS = env.substitute(m_typeS);
m_valueS = env.substitute(m_valueS);
} catch (Exception ex) {
}
if (m_typeS.toLowerCase().startsWith("date") && m_typeS.indexOf(":") > 0) {
String format = m_typeS.substring(m_typeS.indexOf(":") + 1,
m_typeS.length());
m_dateFormat = new SimpleDateFormat(format);
if (!m_valueS.toLowerCase().equals("now")) {
try {
m_parsedDate = m_dateFormat.parse(m_valueS);
} catch (ParseException e) {
throw new IllegalArgumentException("Date value \"" + m_valueS
+ " \" can't be parsed with formatting string \"" + format
+ "\"");
}
}
}
}
/**
* Return a nicely formatted string for display
*
* @return a textual description
*/
@Override
public String toString() {
StringBuffer buff = new StringBuffer();
buff.append("Name: ").append(m_name).append(" ");
String type = m_type;
if (type.toLowerCase().startsWith("date") && type.indexOf(":") > 0) {
type = type.substring(0, type.indexOf(":"));
String format = m_type.substring(m_type.indexOf(":" + 1,
m_type.length()));
buff.append("Type: ").append(type).append(" [").append(format)
.append("] ");
} else {
buff.append("Type: ").append(type).append(" ");
}
buff.append("Value: ").append(m_value);
return buff.toString();
}
public String toStringInternal() {
StringBuffer buff = new StringBuffer();
buff.append(m_name).append("@").append(m_type).append("@")
.append(m_value);
return buff.toString();
}
}
/**
* Constructs a new AddUserFields
*/
public AddUserFields() {
m_attributeSpecs = new ArrayList<AttributeSpec>();
}
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "A filter that adds new attributes with user specified type and constant value. "
+ "Numeric, nominal, string and date attributes can be created. "
+ "Attribute name, and value can be set with environment variables. Date "
+ "attributes can also specify a formatting string by which to parse "
+ "the supplied date value. Alternatively, a current time stamp can "
+ "be specified by supplying the special string \"now\" as the value "
+ "for a date attribute.";
}
/**
* Returns the Capabilities of this filter.
*
* @return the capabilities of this object
* @see Capabilities
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enableAllAttributes();
result.enable(Capability.MISSING_VALUES);
// class
result.enableAllClasses();
result.enable(Capability.NO_CLASS);
return result;
}
/**
* Clear the list of attribute specifications
*/
public void clearAttributeSpecs() {
if (m_attributeSpecs == null) {
m_attributeSpecs = new ArrayList<AttributeSpec>();
}
m_attributeSpecs.clear();
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration listOptions() {
Vector<Option> newVector = new Vector<Option>();
newVector
.addElement(new Option(
"\tNew field specification (name@type@value).\n"
+ "\t Environment variables may be used for any/all parts of the\n"
+ "\tspecification. Type can be one of (numeric, nominal, string or date).\n"
+ "\tThe value for date be a specific date string or the special string\n"
+ "\t\"now\" to indicate the current date-time. A specific date format\n"
+ "\tstring for parsing specific date values can be specified by suffixing\n"
+ "\tthe type specification - e.g. \"myTime@date:MM-dd-yyyy@08-23-2009\"."
+ "This option may be specified multiple times", "A", 1,
"-A <name:type:value>"));
return newVector.elements();
}
/**
* Parses a given list of options.
* <p/>
*
* <!-- options-start -->
Valid options are:
* <p/>
*
* <pre>
* -A <name:type:value>
* New field specification (name@type@value).
* Environment variables may be used for any/all parts of the
* specification. Type can be one of (numeric, nominal, string or date).
* The value for date be a specific date string or the special string
* "now" to indicate the current date-time. A specific date format
* string for parsing specific date values can be specified by suffixing
* the type specification - e.g. "myTime@date:MM-dd-yyyy@08-23-2009".
* This option may be specified multiple times
* </pre>
*
* <!-- options-end -->
*
* @param otions the list of options as an array of string
* @throws Exception if an option is not supported
*/
@Override
public void setOptions(String[] options) throws Exception {
clearAttributeSpecs();
String attS = "";
while ((attS = Utils.getOption('A', options)).length() > 0) {
addAttributeSpec(attS);
}
Utils.checkForRemainingOptions(options);
}
/**
* Gets the current settings of the filter
*
* @return an array of strings suitable for passing to setOptions
*/
@Override
public String[] getOptions() {
ArrayList<String> options = new ArrayList<String>();
for (int i = 0; i < m_attributeSpecs.size(); i++) {
options.add("-A");
options.add(m_attributeSpecs.get(i).toStringInternal());
}
if (options.size() == 0) {
return new String[0];
}
return options.toArray(new String[1]);
}
/**
* Add an attribute spec to the list
*
* @param spec the attribute spec to add
*/
public void addAttributeSpec(String spec) {
AttributeSpec newSpec = new AttributeSpec(spec);
m_attributeSpecs.add(newSpec);
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String attributeSpecsTipText() {
return "Specifications of the new attributes to create";
}
/**
* Set the list of attribute specs to use to create the new attributes.
*
* @param specs the list of attribute specs to use
*/
public void setAttributeSpecs(List<AttributeSpec> specs) {
m_attributeSpecs = specs;
}
/**
* Get the list of attribute specs to use to create the new attributes.
*
* @return the list of attribute specs to use
*/
public List<AttributeSpec> getAttributeSpecs() {
return m_attributeSpecs;
}
/**
* Set environment varialbes to use
*
* @param the environment variables to use
*/
@Override
public void setEnvironment(Environment env) {
m_env = env;
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored -
* only the structure is required).
* @return true if the outputFormat may be collected immediately
* @throws Exception if the input format can't be set successfully
*/
@Override
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(instanceInfo);
setOutputFormat();
return true;
}
/**
* Input an instance for filtering. Ordinarily the instance is processed and
* made available for output immediately. Some filters require all instances
* be read before producing output.
*
* @param instance the input instance
* @return true if the filtered instance may now be collected with output().
* @throws IllegalStateException if no input format has been defined.
*/
@Override
public boolean input(Instance instance) {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (m_NewBatch) {
resetQueue();
m_NewBatch = false;
}
if (outputFormatPeek() == null) {
setOutputFormat();
}
Instance inst = (Instance) instance.copy();
// First copy string values from input to output
copyValues(inst, true, inst.dataset(), getOutputFormat());
convertInstance(inst);
return true;
}
/**
* Add the new attribute values to an instance
*
* @param instance the instance to process
*/
protected void convertInstance(Instance instance) {
double[] vals = new double[outputFormatPeek().numAttributes()];
// original values first
for (int i = 0; i < instance.numAttributes(); i++) {
vals[i] = instance.value(i);
}
// new user values
Instances outputFormat = getOutputFormat();
for (int i = instance.numAttributes(); i < outputFormatPeek()
.numAttributes(); i++) {
AttributeSpec spec = m_attributeSpecs.get(i - instance.numAttributes());
Attribute outAtt = outputFormat.attribute(i);
if (outAtt.isDate()) {
vals[i] = spec.getDateValue().getTime();
} else if (outAtt.isNumeric()) {
vals[i] = spec.getNumericValue();
} else if (outAtt.isNominal()) {
String nomVal = spec.getNominalOrStringValue();
vals[i] = outAtt.indexOfValue(nomVal);
} else {
// string attribute
String nomVal = spec.getNominalOrStringValue();
vals[i] = outAtt.addStringValue(nomVal);
}
}
Instance inst = null;
if (instance instanceof SparseInstance) {
inst = new SparseInstance(instance.weight(), vals);
} else {
inst = new DenseInstance(instance.weight(), vals);
}
inst.setDataset(outputFormat);
push(inst);
}
/**
* Create and set the output format
*/
protected void setOutputFormat() {
if (m_env == null) {
m_env = Environment.getSystemWide();
}
Instances inputF = getInputFormat();
ArrayList<Attribute> newAtts = new ArrayList<Attribute>();
// existing attributes
for (int i = 0; i < inputF.numAttributes(); i++) {
newAtts.add((Attribute) inputF.attribute(i).copy());
}
// new user-defined attributes
for (int i = 0; i < m_attributeSpecs.size(); i++) {
AttributeSpec a = m_attributeSpecs.get(i);
a.init(m_env);
String type = a.getResolvedType();
Attribute newAtt = null;
if (type.toLowerCase().startsWith("date")) {
String format = a.getDateFormat();
if (format == null) {
format = "yyyy-MM-dd'T'HH:mm:ss";
}
newAtt = new Attribute(a.getResolvedName(), format);
} else if (type.toLowerCase().startsWith("string")) {
newAtt = new Attribute(a.getResolvedName(), (List<String>) null);
} else if (type.toLowerCase().startsWith("nominal")) {
List<String> vals = new ArrayList<String>();
vals.add(a.getResolvedValue());
newAtt = new Attribute(a.getResolvedName(), vals);
} else {
// numeric
newAtt = new Attribute(a.getResolvedName());
}
newAtts.add(newAtt);
}
Instances outputFormat = new Instances(inputF.relationName(), newAtts, 0);
outputFormat.setClassIndex(inputF.classIndex());
setOutputFormat(outputFormat);
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments to the filter: use -h for help
*/
public static void main(String[] argv) {
runFilter(new AddUserFields(), argv);
}
}