/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.generator;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Tools;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowReader;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.RandomGenerator;
/**
* Abstract superclass of all attribute generators. Implementing classes have to implement the
* <tt>generate(Example)</tt>, method and specify the input and output attributes by the appropriate
* methods so that the using algorithms can use them correctly.
*
* @author Simon Fischer, Ingo Mierswa Exp $
*/
public abstract class FeatureGenerator {
private static final String[] FUNCTION_NAMES = { "+", "-", "*", "/", "1/", "sin", "cos", "tan", "atan", "exp", "log",
"min", "max", "floor", "ceil", "round", "sqrt", "abs", "sgn", "pow" };
/** The classes which corresponds to FUNCTION_NAMES. */
private static final List<Class<? extends FeatureGenerator>> GENERATOR_CLASSES = Arrays.asList(
BasicArithmeticOperationGenerator.class, BasicArithmeticOperationGenerator.class,
BasicArithmeticOperationGenerator.class, BasicArithmeticOperationGenerator.class, ReciprocalValueGenerator.class,
TrigonometricFunctionGenerator.class, TrigonometricFunctionGenerator.class, TrigonometricFunctionGenerator.class,
TrigonometricFunctionGenerator.class, ExponentialFunctionGenerator.class, ExponentialFunctionGenerator.class,
MinMaxGenerator.class, MinMaxGenerator.class, FloorCeilGenerator.class, FloorCeilGenerator.class,
FloorCeilGenerator.class, SquareRootGenerator.class, AbsoluteValueGenerator.class, SignumGenerator.class,
PowerGenerator.class);
/** Maps function names to generators. */
private static Map<String, Class<? extends FeatureGenerator>> generatorMap;
static {
generatorMap = new HashMap<>();
for (int i = 0; i < FUNCTION_NAMES.length; i++) {
generatorMap.put(FUNCTION_NAMES[i], GENERATOR_CLASSES.get(i));
}
}
/** Indicates a non-restrictive generator selection mode. */
public static final int SELECTION_MODE_ALL = 0;
/** Indicates a restrictive generator selection mode. */
public static final int SELECTION_MODE_RESTRICTIVE = 1;
/**
* Indicates the selection mode. One of SELECTION_MODE_ALL and SELECTION_MODE_RESTRICTIVE.
*/
private static int selectionMode = SELECTION_MODE_ALL;
/** The attributes of the function(s) calculated by this FeatureGenerator. */
protected Attribute[] resultAttributes;
/**
* The argument attributes on which to operate with respect to the example tables attribute
* array.
*/
private Attribute[] arguments = null;
/** The example table to work on. */
private ExampleTable exampleTable;
// ------------------------------ The abstract methods
// ------------------------------
/**
* Generates the new attribute values for the example e and returns the new attribute values as
* doubles. <tt>e.getAttribute(getArgument(i))</tt> is the correct way to access argument
* <i>i</i>. If the according attribute's type is VALUE_SERIES, the end index can be determined
* by <tt>i_end = getExampleTable().getBlockEndIndex(getArgument(i))</tt>. Thus all values of
* the series can be accessed using indices <i>i</i> through <i>i_end</i>.
*/
public abstract void generate(DataRow data) throws GenerationException;
/**
* Returns an array of Attributes where the length is the arity of the generator, <tt>[i]</tt>
* is the attribute type of the i-th argument.
*/
public abstract Attribute[] getInputAttributes();
/** Returns the generated attributes types. */
public abstract Attribute[] getOutputAttributes(ExampleTable input);
/**
* Subclasses must implement this method so that a new instance of this generator class is
* returned. The arguments and the example table will not be cloned and thus be null. This kind
* of clone is needed as generating algorithms must be able to clone generators form their pool
* without changing the arguments already set for the others.
*/
public abstract FeatureGenerator newInstance();
/**
* Sets the function name. This method is only useful if subclasses can generate more than one
* function. (like the BasicArithmeticOperationGenerator).
*/
public abstract void setFunction(String name);
/**
* Sets the function name. This method is only useful if subclasses can generate more than one
* function. (like the BasicArithmeticOperationGenerator).
*/
public abstract String getFunction();
/**
* Returns all compatible input attribute arrays for this generator from the given example set
* as list. Features with a depth greater than maxDepth or which contains one of the given
* functions should not be used as input candidates. Subclasses must consider if the generator
* is self-applicable or commutative. A maxDepth of -1 means that no maximal depth should be
* considered.
*/
public abstract List<Attribute[]> getInputCandidates(ExampleSet exampleSet, String[] functions);
// --------------------------------------------------------------------------------
protected boolean checkCompatibility(Attribute attribute, Attribute compatible, String[] functions) {
if (Tools.compatible(attribute, compatible)) {
for (int f = 0; f < functions.length; f++) {
if (attribute.getConstruction().indexOf(functions[f]) != -1) {
return false;
}
}
return true;
} else {
return false;
}
}
protected void setExampleTable(ExampleTable et) {
this.exampleTable = et;
}
/** Gets the example table the examples are from. */
protected ExampleTable getExampleTable() {
return exampleTable;
}
/**
* Sets the arguments (indices) used in future <tt>generate(...)</tt> calls and has to be called
* prior to any <tt>generate(...)</tt> calls. The caller of this method has to take care that:
* <ul>
* <li><tt>args.length == getInputAttributes().length</tt>, i.e. that the arity is correct.
* <li>The types of the example attributes match the types specified by
* <tt>getInputAttributes()</tt>.
* <li>The true attribute indices are used (as used by the example set's example table)
* </ul>
*/
public void setArguments(Attribute[] args) {
arguments = args;
}
/**
* returns <tt>true</tt>, if the arguments have already been set, and <tt>false</tt> otherwise.
*/
public boolean argumentsSet() {
return getInputAttributes().length == 0 || arguments != null;
}
/**
* Returns the i-th selected argument (the true index as used in the example set's example
* table).
*/
public Attribute getArgument(int i) {
return arguments[i];
}
/**
* Checks if the arguments are compatible with the attributes specified by getInputAttributes().
*/
private boolean argumentsOk(ExampleTable input) {
Attribute[] inputA = getInputAttributes();
for (int i = 0; i < inputA.length; i++) {
if (!Tools.compatible(arguments[i], inputA[i])) {
return false;
}
}
return true;
}
// --------------------------------------------------------------------------------
/** Creates a new FeatureGenerator for a given function name. */
public static FeatureGenerator createGeneratorForFunction(String functionName) {
if (functionName == null) {
return null;
}
Class<? extends FeatureGenerator> genClass = generatorMap.get(functionName);
if (genClass == null) {
if (!functionName.startsWith(ConstantGenerator.FUNCTION_NAME)) {
return null;
}
FeatureGenerator gen = new ConstantGenerator();
gen.setFunction(functionName);
return gen;
}
try {
FeatureGenerator gen = genClass.newInstance();
gen.setFunction(functionName);
return gen;
} catch (Exception e) {
LogService.getRoot().log(Level.SEVERE, "com.rapidminer.generator.FeatureGenerator.instantiating_error",
genClass.getName());
return null;
}
}
// --------------------------------------------------------------------------------
/**
* Randomly selects a generator from the generator list. The probability of a generator to be
* selected is proportional to its number of attribute combinations as delivered by
* {@link #getInputCandidates(ExampleSet, String[])} method. Returns null if no generators are
* applicable.
*
* @param generators
* List of {@link FeatureGenerator}s
*/
public static FeatureGenerator selectGenerator(ExampleSet exampleSet, List<? extends FeatureGenerator> generators,
String[] functions, RandomGenerator random) {
int combinationSum = 0;
double[] probs = new double[generators.size()];
int k = 0;
for (FeatureGenerator generator : generators) {
// probs[k] =
// generator.getNumberOfApplicableGenerations(exampleSet);
int candidates = generator.getInputCandidates(exampleSet, functions).size();
probs[k] = candidates;
combinationSum += candidates;
k++;
}
if (combinationSum == 0) {
return null;
}
for (k = 0; k < probs.length; k++) {
probs[k] /= combinationSum;
}
return generators.get(random.randomIndex(probs));
}
// --------------------------------------------------------------------------------
/**
* Generates all new attributes and updates the ExampleTable. Returns a list of Attributes for
* the newly generated attributes.
*
* @param exampleTable
* the source example table
* @param generatorList
* List of FeatureGenerators
* @return A list of Attributes
*/
public static List<Attribute> generateAll(ExampleTable exampleTable, Collection<FeatureGenerator> generatorList)
throws GenerationException {
LogService.getRoot().log(Level.FINE, "com.rapidminer.generator.FeatureGenerator.starting_feature_generation",
generatorList.size());
Iterator<FeatureGenerator> gi = generatorList.iterator();
while (gi.hasNext()) {
gi.next().setExampleTable(exampleTable);
}
// for performance reasons convert the list to an array
FeatureGenerator[] generators = new FeatureGenerator[generatorList.size()];
generatorList.toArray(generators);
List<Attribute> newAttributeList = newAttributes(generators, exampleTable);
// add the attributes to the example table and ensure length of the
// DataRows
exampleTable.addAttributes(newAttributeList);
LogService.getRoot().log(Level.FINE, "com.rapidminer.generator.FeatureGenerator.generator_list", generatorList);
LogService.getRoot().log(Level.FINE,
"com.rapidminer.generator.FeatureGenerator.input_has_feature_count_and_example_count",
new Object[] { exampleTable.getAttributeCount(), exampleTable.size() });
// generate the attribute values:
DataRowReader reader = exampleTable.getDataRowReader();
while (reader.hasNext()) {
DataRow dataRow = reader.next();
for (int j = 0; j < generators.length; j++) {
generators[j].generate(dataRow);
}
}
LogService.getRoot().log(Level.FINE, "com.rapidminer.generator.FeatureGenerator.finished_feature_generation");
LogService.getRoot().log(Level.FINE,
"com.rapidminer.generator.FeatureGenerator.generated_set_has_feature_count_and_example_count",
new Object[] { exampleTable.getAttributeCount(), exampleTable.size() });
return newAttributeList;
}
/**
* Returns a list of new Attributes that are generated by the given generators.
*/
private static List<Attribute> newAttributes(FeatureGenerator[] generators, ExampleTable exampleTable) {
List<Attribute> newAttributeList = new LinkedList<>();
// add the attributes to the example table
for (int i = 0; i < generators.length; i++) {
Attribute outputAttribute[] = generators[i].getOutputAttributes(exampleTable);
generators[i].resultAttributes = new Attribute[outputAttribute.length];
for (int j = 0; j < outputAttribute.length; j++) {
newAttributeList.add(outputAttribute[j]);
generators[i].resultAttributes[j] = outputAttribute[j];
}
// check the arguments
if (!generators[i].argumentsSet()) {
throw new RuntimeException("Catastrophic error: arguments not set for " + generators[i] + "!");
}
if (!generators[i].argumentsOk(exampleTable)) {
LogService.getRoot().log(Level.WARNING, "com.rapidminer.generator.FeatureGenerator.wrong_argument_types",
generators[i]);
}
}
return newAttributeList;
}
public static int getSelectionMode() {
return selectionMode;
}
public static void setSelectionMode(int mode) {
selectionMode = mode;
}
@Override
public String toString() {
return "FeatureGenerator (" + getClass().getName() + ")";
}
/**
* A FeatureGenerator equals another FeatureGenerator if its class is equal and its arguments
* are equal and its function names are equal.
*/
@Override
public boolean equals(Object o) {
if (o == null) {
return false;
}
if (!this.getClass().equals(o.getClass())) {
return false;
}
FeatureGenerator fg = (FeatureGenerator) o;
if (!this.getFunction().equals(fg.getFunction())) {
return false;
}
if (this.arguments.length != fg.arguments.length) {
return false;
}
for (int i = 0; i < arguments.length; i++) {
if (!this.arguments[i].equals(fg.arguments[i])) {
return false;
}
}
return true;
}
@Override
public int hashCode() {
int hashCode = getFunction().hashCode();
if (this.arguments != null) {
hashCode ^= Arrays.hashCode(this.arguments);
}
return hashCode;
}
}