/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.generator;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Tools;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowReader;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.RandomGenerator;
/**
* Abstract superclass of all attribute generators. Implementing classes have to
* implement the <tt>generate(Example)</tt>, method and specify the input and
* output attributes by the appropriate methods so that the using algorithms can
* use them correctly.
*
* @author Simon Fischer, Ingo Mierswa
* @version $Id: FeatureGenerator.java,v 2.30 2006/03/27 13:21:58 ingomierswa
* Exp $
*/
public abstract class FeatureGenerator {
private static final String[] FUNCTION_NAMES = { "+", "-", "*", "/", "1/", "sin", "cos", "tan", "atan", "exp", "log", "min", "max", "floor", "ceil", "round", "sqrt", "abs", "sgn", "pow" };
/** The classes which corresponds to FUNCTION_NAMES. */
private static final Class[] GENERATOR_CLASSES = {
BasicArithmeticOperationGenerator.class,
BasicArithmeticOperationGenerator.class,
BasicArithmeticOperationGenerator.class,
BasicArithmeticOperationGenerator.class,
ReciprocalValueGenerator.class,
TrigonometricFunctionGenerator.class,
TrigonometricFunctionGenerator.class,
TrigonometricFunctionGenerator.class,
TrigonometricFunctionGenerator.class,
ExponentialFunctionGenerator.class,
ExponentialFunctionGenerator.class,
MinMaxGenerator.class,
MinMaxGenerator.class,
FloorCeilGenerator.class,
FloorCeilGenerator.class,
FloorCeilGenerator.class,
SquareRootGenerator.class,
AbsoluteValueGenerator.class,
SignumGenerator.class,
PowerGenerator.class
};
/** Maps function names to generators. */
private static Map<String, Class> generatorMap;
static {
generatorMap = new HashMap<String, Class>();
for (int i = 0; i < FUNCTION_NAMES.length; i++) {
generatorMap.put(FUNCTION_NAMES[i], GENERATOR_CLASSES[i]);
}
}
/** Indicates a non-restrictive generator selection mode. */
public static final int SELECTION_MODE_ALL = 0;
/** Indicates a restrictive generator selection mode. */
public static final int SELECTION_MODE_RESTRICTIVE = 1;
/**
* Indicates the selection mode. One of SELECTION_MODE_ALL and
* SELECTION_MODE_RESTRICTIVE.
*/
private static int selectionMode = SELECTION_MODE_ALL;
/** The attributes of the function(s) calculated by this FeatureGenerator. */
protected Attribute[] resultAttributes;
/**
* The argument attributes on which to operate with respect to the example
* tables attribute array.
*/
private Attribute[] arguments = null;
/** The example table to work on. */
private ExampleTable exampleTable;
// ------------------------------ The abstract methods
// ------------------------------
/**
* Generates the new attribute values for the example e and returns the new
* attribute values as doubles. <tt>e.getAttribute(getArgument(i))</tt> is
* the correct way to access argument <i>i</i>. If the according
* attribute's type is VALUE_SERIES, the end index can be determined by
* <tt>i_end = getExampleTable().getBlockEndIndex(getArgument(i))</tt>.
* Thus all values of the series can be accessed using indices <i>i</i>
* through <i>i_end</i>.
*/
public abstract void generate(DataRow data) throws GenerationException;
/**
* Returns an array of Attributes where the length is the arity of the
* generator, <tt>[i]</tt> is the attribute type of the i-th argument.
*/
public abstract Attribute[] getInputAttributes();
/** Returns the generated attributes types. */
public abstract Attribute[] getOutputAttributes(ExampleTable input);
/**
* Subclasses must implement this method so that a new instance of this
* generator class is returned. The arguments and the example table will not
* be cloned and thus be null. This kind of clone is needed as generating
* algorithms must be able to clone generators form their pool without
* changing the arguments already set for the others.
*/
public abstract FeatureGenerator newInstance();
/**
* Sets the function name. This method is only useful if subclasses can
* generate more than one function. (like the
* BasicArithmeticOperationGenerator).
*/
public abstract void setFunction(String name);
/**
* Sets the function name. This method is only useful if subclasses can
* generate more than one function. (like the
* BasicArithmeticOperationGenerator).
*/
public abstract String getFunction();
/**
* Returns all compatible input attribute arrays for this generator from the
* given example set as list. Features with a depth greater than maxDepth or
* which contains one of the given functions should not be used as input
* candidates. Subclasses must consider if the generator is self-applicable
* or commutative. A maxDepth of -1 means that no maximal depth should be
* considered.
*/
public abstract List<Attribute[]> getInputCandidates(ExampleSet exampleSet, int maxDepth, String[] functions);
// --------------------------------------------------------------------------------
protected boolean checkCompatibility(Attribute attribute, Attribute compatible, int maxDepth, String[] functions) {
if (Tools.compatible(attribute, compatible) && ((maxDepth == -1) || (attribute.getConstruction().getDepth() <= maxDepth))) {
for (int f = 0; f < functions.length; f++) {
if (attribute.getConstruction().getDescription().indexOf(functions[f]) != -1)
return false;
}
return true;
} else {
return false;
}
}
protected void setExampleTable(ExampleTable et) {
this.exampleTable = et;
}
/** Gets the example table the examples are from. */
protected ExampleTable getExampleTable() {
return exampleTable;
}
/**
* Sets the arguments (indices) used in future <tt>generate(...)</tt>
* calls and has to be called prior to any <tt>generate(...)</tt> calls.
* The caller of this method has to take care that:
* <ul>
* <li><tt>args.length == getInputAttributes().length</tt>, i.e. that
* the arity is correct.
* <li>The types of the example attributes match the types specified by
* <tt>getInputAttributes()</tt>.
* <li>The true attribute indices are used (as used by the example set's
* example table)
* </ul>
*/
public void setArguments(Attribute[] args) {
arguments = args;
}
/**
* returns <tt>true</tt>, if the arguments have already been set, and
* <tt>false</tt> otherwise.
*/
public boolean argumentsSet() {
return (getInputAttributes().length == 0) || (arguments != null);
}
/**
* Returns the i-th selected argument (the true index as used in the example
* set's example table).
*/
public Attribute getArgument(int i) {
return arguments[i];
}
/**
* Checks if the arguments are compatible with the attributes specified by
* getInputAttributes().
*/
private boolean argumentsOk(ExampleTable input) {
Attribute[] inputA = getInputAttributes();
for (int i = 0; i < inputA.length; i++) {
if (!Tools.compatible(arguments[i], inputA[i]))
return false;
}
return true;
}
// --------------------------------------------------------------------------------
/** Creates a new FeatureGenerator for a given function name. */
public static FeatureGenerator createGeneratorForFunction(String functionName) {
if (functionName == null)
return null;
Class genClass = generatorMap.get(functionName);
if (genClass == null) {
if (functionName.startsWith(ConstantGenerator.FUNCTION_NAME)) {
FeatureGenerator gen = new ConstantGenerator();
gen.setFunction(functionName);
return gen;
} else {
LogService.getGlobal().log("Unknown feature generator: '" + functionName + "'", LogService.ERROR);
return null;
}
} else {
try {
FeatureGenerator gen = (FeatureGenerator) genClass.newInstance();
gen.setFunction(functionName);
return gen;
} catch (Exception e) {
LogService.getGlobal().log("Cannot instanciate '" + genClass.getName() + "'", LogService.ERROR);
return null;
}
}
}
// --------------------------------------------------------------------------------
/**
* Randomly selects a generator from the generator list. The probability of
* a generator to be selected is proportional to its number of attribute
* combinations as delivered by
* {@link #getInputCandidates(ExampleSet, int, String[])} method. Returns
* null if no generators are applicable.
*
* @param generators
* List of {@link FeatureGenerator}s
*/
public static FeatureGenerator selectGenerator(ExampleSet exampleSet, List generators, int maxDepth, String[] functions, RandomGenerator random) {
int combinationSum = 0;
Iterator i = generators.iterator();
double[] probs = new double[generators.size()];
int k = 0;
while ((i.hasNext())) {
FeatureGenerator generator = (FeatureGenerator) i.next();
// probs[k] =
// generator.getNumberOfApplicableGenerations(exampleSet);
probs[k] = generator.getInputCandidates(exampleSet, maxDepth, functions).size();
combinationSum += probs[k];
k++;
}
if (combinationSum == 0)
return null;
else {
for (k = 0; k < probs.length; k++)
probs[k] /= combinationSum;
int index = random.randomIndex(probs);
FeatureGenerator selected = (FeatureGenerator) generators.get(index);
return selected;
}
}
// --------------------------------------------------------------------------------
/**
* Generates all new attributes and updates the ExampleTable. Returns a list
* of Attributes for the newly generated attributes.
*
* @param exampleTable
* the source example table
* @param generatorList
* List of FeatureGenerators
* @return A list of Attributes
*/
public static List<Attribute> generateAll(ExampleTable exampleTable, Collection<FeatureGenerator> generatorList) throws GenerationException {
LogService.getGlobal().log("Starting feature generation with " + generatorList.size() + " generators.", LogService.STATUS);
Iterator<FeatureGenerator> gi = generatorList.iterator();
while (gi.hasNext())
gi.next().setExampleTable(exampleTable);
// for performance reasons convert the list to an array
FeatureGenerator[] generators = new FeatureGenerator[generatorList.size()];
generatorList.toArray(generators);
List<Attribute> newAttributeList = newAttributes(generators, exampleTable);
// add the attributes to the example table and ensure length of the
// DataRows
exampleTable.addAttributes(newAttributeList);
LogService.getGlobal().log("Generator list: " + generatorList, LogService.STATUS);
LogService.getGlobal().log("Input set has " + exampleTable.getAttributeCount() + " features, " + exampleTable.size() + " examples.", LogService.STATUS);
// generate the attribute values:
DataRowReader reader = exampleTable.getDataRowReader();
while (reader.hasNext()) {
DataRow dataRow = reader.next();
for (int j = 0; j < generators.length; j++) {
generators[j].generate(dataRow);
}
}
LogService.getGlobal().log("Finished feature generation.", LogService.STATUS);
LogService.getGlobal().log("Generated set has " + exampleTable.getAttributeCount() + " features, " + exampleTable.size() + " examples.", LogService.STATUS);
return newAttributeList;
}
/**
* Returns a list of new Attributes that are generated by the given
* generators.
*/
private static List<Attribute> newAttributes(FeatureGenerator[] generators, ExampleTable exampleTable) {
List<Attribute> newAttributeList = new LinkedList<Attribute>();
// add the attributes to the example table
for (int i = 0; i < generators.length; i++) {
Attribute outputAttribute[] = generators[i].getOutputAttributes(exampleTable);
generators[i].resultAttributes = new Attribute[outputAttribute.length];
for (int j = 0; j < outputAttribute.length; j++) {
//Attribute newAttribute = getAttributeInTable(exampleTable, outputAttribute[j]);
//if (newAttribute == null) {
newAttributeList.add(outputAttribute[j]);
generators[i].resultAttributes[j] = outputAttribute[j];
//} else {
//newAttributeList.add(newAttribute);
//generators[i].resultAttributes[j] = newAttribute;
//LogService.getGlobal().log("Attribute '" + outputAttribute[j].getConstruction() + "' already generated", LogService.WARNING);
//}
}
// check the arguments
if (!generators[i].argumentsSet()) {
throw new RuntimeException("Catastrophic error: arguments not set for " + generators[i] + "!");
}
if (!generators[i].argumentsOk(exampleTable)) {
LogService.getGlobal().log("Wrong argument types for " + generators[i] + ".", LogService.WARNING);
}
}
return newAttributeList;
}
/**
* Returns the attribute with the name of the given attribute from the table.
* May return null (no attribute with this name is part of the table).
*/
/*
public static Attribute getAttributeInTable(ExampleTable table, Attribute attribute) {
for (int i = 0; i < table.getNumberOfAttributes(); i++) {
Attribute a = table.getAttribute(i);
if ((a != null) && (a.getName().equals(attribute.getName())))
return a;
}
return null;
}
*/
public static int getSelectionMode() {
return selectionMode;
}
public static void setSelectionMode(int mode) {
selectionMode = mode;
}
public String toString() {
return "FeatureGenerator (" + getClass().getName() + ")";
}
/**
* A FeatureGenerator equals another FeatureGenerator if its class is equal
* and its arguments are equal and its function names are equal.
*/
public boolean equals(Object o) {
if (o == null) // necessary here because otherwise the next line will throw a NPE
return false;
if (!this.getClass().equals(o.getClass()))
return false;
FeatureGenerator fg = (FeatureGenerator) o;
if (!this.getFunction().equals(fg.getFunction()))
return false;
if (this.arguments.length != fg.arguments.length)
return false;
for (int i = 0; i < arguments.length; i++) {
if (!this.arguments[i].equals(fg.arguments[i]))
return false;
}
return true;
}
public int hashCode() {
int hashCode = getFunction().hashCode();
if (this.arguments != null)
hashCode ^= this.arguments.hashCode();
return hashCode;
}
}