package de.tud.inf.operator.generator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.ListDataRowReader;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.generator.ExampleSetGenerator;
import com.rapidminer.operator.generator.TargetFunction;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
import de.tud.inf.example.set.ComplexExampleSet;
import de.tud.inf.example.table.ComplexAttributeDescription;
import de.tud.inf.example.table.ComplexAttributeFactory;
import de.tud.inf.example.table.ComplexExampleTable;
/**
* generates complex example set, provides separate definition of parameter attributes values (e.g. target function etc.),
* one can choose between three different example set types. They define, how many example set attributes are complex, and the number of their
* inner attributes
*
*
* @author Antje Gruner
*
*/
public class ComplexExampleSetGenerator extends ExampleSetGenerator{
/** the parameter name for "determine the number of complex attributes in this exampleSet" */
private static final String PARAMETER_COMPLEX_ATTRIBUTES_COUNT = "complex_attributes_count";
/** The parameter name for "determine the dimension of all inner attributes." */
private static final String PARAMETER_INNER_ATTRIBUTE_COUNT = "inner_attribute_count";
/** The parameter name for "determine the type of all complex attributes" */
private static final String PARAMETER_COMPLEX_ATTRIBUTE_TYPE = "complex_attributes_type";
/** implemented complex types */
private static final String[] complexTypes = new String[]{Ontology.VALUE_TYPE_NAMES[Ontology.UNIFORM],Ontology.VALUE_TYPE_NAMES[Ontology.COMPLEX_VALUE]};
/** The parameter name for "used value for performance tests " */
private static final String PARAMETER_PERFORMANCE_TYPE = "performance_test_type";
private static final String[] perform_types = new String[]{"1_complexA_full_dimension","half_complexA_one_dim","all_complexA_one_dim"};
/** The parameter name for "Specifies the target function of this example set parameters" */
private static final String PARAMETER_PARAMATTS_TARGET_FUNCTION = "parameter_target_function";
/** The parameter name for "The minimum value for the parameter attributes." */
private static final String PARAMETER_PARAMATTS_ATTRIBUTES_LOWER_BOUND = "parameter_attributes_lower_bound";
/** The parameter name for "The maximum value for the parameter attributes." */
private static final String PARAMETER_PARAMATTS_ATTRIBUTES_UPPER_BOUND = "parameter_attributes_upper_bound";
/** The parameter name for "Use the given random seed instead of global random numbers (-1: use global)." */
private static final String PARAMETER_PARAMATTS_LOCAL_RANDOM_SEED = "parameter_local_random_seed";
private int dim;
private int paramCount = 1; //default case: uniform
private int cCount;
public ComplexExampleSetGenerator(OperatorDescription description) throws UndefinedParameterError {
super(description);
}
@Override
public IOObject[] apply() throws OperatorException {
//determine type of complex attributes and set appropriate parameters
setParameters();
//generate two example tables, one with attributes and one with parameters, and merge
//1. create default table with label attribute
ExampleTable defaultTable = createDefaultExampleTable(getParameterAsInt(PARAMETER_NUMBER_OF_ATTRIBUTES));
if(this.paramCount > 0){
//2. initialize and create parameter table
double lower = getParameterAsDouble(PARAMETER_PARAMATTS_ATTRIBUTES_LOWER_BOUND);
double upper = getParameterAsDouble(PARAMETER_PARAMATTS_ATTRIBUTES_UPPER_BOUND);
String functionName = getParameterAsString(PARAMETER_PARAMATTS_TARGET_FUNCTION);
int seed = getParameterAsInt(PARAMETER_PARAMATTS_LOCAL_RANDOM_SEED);
TargetFunction function = initializeFunction(lower, upper, functionName, getParameterAsInt(PARAMETER_NUMBER_EXAMPLES), cCount*paramCount);
ExampleTable paramTable = createExampleTable( cCount*paramCount,
getParameterAsInt(PARAMETER_NUMBER_EXAMPLES),
function,
seed,
false,
"param"
);
//3. merge tables
ExampleTable result = mergeTables(paramTable,defaultTable); //since defaultTable contains label attribute, label will be last attribute in list
return new IOObject[] { createExampleSet(result, label,paramTable.getNumberOfAttributes()) };
}
else return new IOObject[] { createExampleSet(defaultTable, label,0) };
}
public ExampleTable mergeTables(ExampleTable t1,ExampleTable t2) throws UndefinedParameterError{
//copy attributes
MemoryExampleTable result = new MemoryExampleTable(Arrays.asList(t1.getAttributes()));
for(int i =0;i< t2.getNumberOfAttributes();i++){
Attribute a = AttributeFactory.createAttribute(t2.getAttribute(i));
result.addAttribute(a);
}
List<DataRow> data = new LinkedList<DataRow>();
//create new DataRows
DataRowFactory factory = new DataRowFactory(getParameterAsInt(PARAMETER_DATAMANAGEMENT),'.');
int newLength = t1.getNumberOfAttributes() + t2.getNumberOfAttributes();
DataRow r1, r2;
int t1attCount = t1.getNumberOfAttributes();
for(int e =0;e<getParameterAsInt(PARAMETER_NUMBER_EXAMPLES);e++){
r1 = t1.getDataRow(e);
r2 = t2.getDataRow(e);
DataRow row = factory.create(newLength);
for (int i = 0; i < newLength; i++){
Attribute set = result.getAttribute(i);
if(i < t1attCount){
Attribute get = t1.getAttribute(i);
if(set.getName().equals(get.getName()))
row.set(result.getAttribute(i), r1.get(t1.getAttribute(i)));
else{
System.out.println("sth wrong");
}
}
else{
Attribute get = t2.getAttribute(i - t1attCount);
if(set.getName().equals(get.getName()))
row.set(result.getAttribute(i), r2.get(get));
else{
System.out.println("sth wrong");
}
}
}
row.trim();
data.add(row);
}
// fill table with data
result.readExamples(new ListDataRowReader(data.iterator()));
return result;
}
public ComplexExampleSet createExampleSet(ExampleTable table, Attribute label, int nrParams) throws OperatorException{
List<ComplexAttributeDescription> cadList = null;
cadList = new ArrayList<ComplexAttributeDescription>();
for(int i=0;i< cCount;i++){
int[] attIds = new int[dim];
int[] paramIds = new int[paramCount];
for(int j=0; j<dim;j++){
attIds[j] = table.getAttribute(nrParams + i*dim + j).getTableIndex(); //factor
}
for(int k=0;k<paramCount;k++)
paramIds[k] = table.getAttribute(i*paramCount + k).getTableIndex(); //offset + factor
// ComplexAttributeDescription cad = new ComplexAttributeDescription( attIds,
// paramIds,
// complexTypes[getParameterAsInt(PARAMETER_COMPLEX_ATTRIBUTE_TYPE)],
// "complAtt_"+i,"");
ComplexAttributeDescription cad = ComplexAttributeFactory
.createAttributeDescription(
attIds,
paramIds,
complexTypes[getParameterAsInt(PARAMETER_COMPLEX_ATTRIBUTE_TYPE)],
"complAtt_" + i, "");
cadList.add(cad);
}
ComplexExampleTable cet = new ComplexExampleTable(table,cadList);
return cet.createExampleSet(cet.getAttribute(cet.getAttributeCount()-1)); //the label attribute
}
public void setParameters() throws OperatorException{
int nrAttributes = getParameterAsInt(PARAMETER_NUMBER_OF_ATTRIBUTES);
switch(getParameterAsInt(PARAMETER_COMPLEX_ATTRIBUTE_TYPE)){
case 0: // uniform
paramCount = 1;
break;
case 1: //default complex value
paramCount = 0;
break;
}
switch(getParameterAsInt(PARAMETER_PERFORMANCE_TYPE)){
case 0: // one complex attribute with all attributes (except label) as inner attributes
dim = nrAttributes;
if(dim<=0)
cCount = 0;
else
cCount = 1;
break;
case 1: //half the attributes are complex
dim = 1;
cCount = nrAttributes/2;
break;
case 2: //all attributes are complex
dim = 1;
cCount = nrAttributes;
break;
default:
dim = getParameterAsInt(PARAMETER_INNER_ATTRIBUTE_COUNT);
cCount = getParameterAsInt(PARAMETER_COMPLEX_ATTRIBUTES_COUNT);
break;
}
}
public Class<?>[] getOutputClasses() {
return new Class[]{ComplexExampleSet.class};
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type;
type = new ParameterTypeInt(PARAMETER_COMPLEX_ATTRIBUTES_COUNT, "the number of complex attributes in this example set", 0, Integer.MAX_VALUE, 0);
type.setExpert(false);
types.add(type);
type = new ParameterTypeInt(PARAMETER_INNER_ATTRIBUTE_COUNT, "the number of inner attributes of each complex attributes in this example set", 1, Integer.MAX_VALUE, 1);
type.setExpert(false);
types.add(type);
type = new ParameterTypeCategory(PARAMETER_PERFORMANCE_TYPE,"complex example set types of implemented performance test",perform_types,0);
type.setExpert(true);
types.add(type);
types.add(new ParameterTypeCategory(PARAMETER_COMPLEX_ATTRIBUTE_TYPE, "determines the type of complex attributes",complexTypes,0));
type = new ParameterTypeStringCategory(PARAMETER_PARAMATTS_TARGET_FUNCTION, "Specifies the target function of this example set", KNOWN_FUNCTION_NAMES);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeDouble(PARAMETER_PARAMATTS_ATTRIBUTES_LOWER_BOUND, "The minimum value for the parameter attributes.", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, -10));
types.add(new ParameterTypeDouble(PARAMETER_PARAMATTS_ATTRIBUTES_UPPER_BOUND, "The maximum value for the parameter attributes.", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 10));
types.add(new ParameterTypeString(PARAMETER_PARAMATTS_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (-1: use global).","-1"));
return types;
}
}