package de.tud.inf.operator.io;
import java.io.IOException;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowReader;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.operator.io.ArffExampleSource;
import com.rapidminer.parameter.UndefinedParameterError;
import de.tud.inf.example.table.ComplexExampleTable;
import de.tud.inf.example.table.RelationalAttribute;
/***
* reads a complex arff file, but ignores complex information, i.e. complex function and its parameters
* since it is not possible to ignore relational attributes within a complex arff, (e.g. with matrix values for each row) and to map them to a simple attribute makes no
* sense an exception is thrown if relational attributes occur in the dataset
* @author Antje Gruner
*
*/
public class FlatComplexArffReader extends ComplexArffReader{
public FlatComplexArffReader(StreamTokenizer tokenizer,
ArffExampleSource arffES, String parameter_sample_size,
String parameter_sample_ratio, String parameter_datamanagement,
String parameter_local_random_seed,
String parameter_decimal_point_character) {
super(tokenizer, arffES, parameter_sample_size, parameter_sample_ratio,
parameter_datamanagement, parameter_local_random_seed,
parameter_decimal_point_character);
}
/***
*
* @param tokenizer
* @param attributeName name of the relational Attribute
* @param depAttribute is true if relational attribute appears in dependency information of arff file
* @return relational Attribute with wrapped innerAttributes
protected Attribute readRelationalAttribute(StreamTokenizer tokenizer, String attributeName, boolean depAttribute) throws IOException{
if(!depAttribute)
throw new IOException("complex arff file contains attribute type "+ tokenizer.sval + ", which could not be mapped to a primitive attribute");
else return super.readRelationalAttribute(tokenizer, attributeName, depAttribute);
}
*/
@Override
/**
* the data ExampleTable is build without those attributes which serve as parameters of complex attributes
*/
protected ComplexExampleTable buildTable(List<Attribute> attributes,ExampleTable depEt) throws UndefinedParameterError, IOException{
//read the "real" dataset
ExampleTable et = readData(attributes);
//reads rows of dependency information
DataRowReader reader = depEt.getDataRowReader();
DataRow row;
Attribute paramA = null;
//find that concrete attribute in dependency example table which stores parameter information
for(int i =0; i<depEt.getAttributeCount();i++){
Attribute a;
a = depEt.getAttribute(i);
if(a != null)
if(a.getName().equals(ComplexArffDescription.depParamName)){
paramA = a; //TODO: check reference, or clone? (reference should work)
break;
}
}
if(paramA != null){
//find concrete parameters of complex functions and store names in list
ArrayList<String> params = new ArrayList<String>();
while (reader.hasNext()){
row = reader.next();
//fetch value list of relational parameter attribute
double[][] pNames = row.getRelativeValuesFor(paramA.getTableIndex());
//extract parameter names (first and only entries in pNames list)
if(pNames != null)
for(int j= 0;j<pNames.length;j++)
params.add(((RelationalAttribute)paramA).getInnerAttributeAt(0).getMapping().mapIndex((int)pNames[j][0]));
}
//remove parameter attributes from example table attribute list
for(int i =0;i<et.getNumberOfAttributes();i++){
Attribute a = et.getAttribute(i);
if( (a!= null)&&(params.contains(a.getName()))){
et.removeAttribute(i);
}
}
}
//check if there are relational attributes in dataSet, which are NO parameter attributes of complex attributes
for(int i =0; i<et.getAttributeCount();i++){
Attribute a = et.getAttribute(i);
if((a != null)&&(a.isRelational()))
throw new IOException("complex arff file contains relational attribute "+a.getName()+" which could not be mapped to a primitive attribute");
}
return new ComplexExampleTable(et);
}
}