package de.tud.inf.operator.io; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.StreamTokenizer; import java.util.HashMap; import java.util.List; import java.util.Map; import com.rapidminer.example.Attribute; import com.rapidminer.example.Attributes; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.io.ArffExampleSource; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.tools.Tools; import de.tud.inf.example.set.ComplexExampleSet; import de.tud.inf.example.table.ComplexExampleTable; /** * creates a ComplexExampleSet from and ARFF file, when file does not contain complex information (i.e dependency information) exception is thrownm * @author Antje Gruner * */ public class ComplexArffExampleSource extends ArffExampleSource{ /** The parameter name for "select whether read or ignore complex information in complex arff file " */ public static final String PARAMETER_READ_COMPLEX_ATTRIBUTES = "read_complex_attributes"; public ComplexArffExampleSource(OperatorDescription description) { super(description); } public IOObject[] apply()throws OperatorException { try{ ComplexArffReader reader; //store new Annotation somewhere else String complexArffAnnotation = "@DATATABLE"; File file = getParameterAsFile(PARAMETER_DATA_FILE); BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), getEncoding())); // init Attribute label = null; Attribute weight = null; Attribute id = null; // read file StreamTokenizer tokenizer = createTokenizer(in); Tools.getFirstToken(tokenizer); if (tokenizer.ttype == StreamTokenizer.TT_EOF) { throw new UserError(this, 302, getParameterAsString(PARAMETER_DATA_FILE), "file is empty"); } if(complexArffAnnotation.equalsIgnoreCase(tokenizer.sval)){ Tools.getNextToken(tokenizer); Tools.getLastToken(tokenizer, false); if(getParameterAsBoolean(PARAMETER_READ_COMPLEX_ATTRIBUTES)) //first read relations which exists (if just one, normal case, if two should be dependency case, three are not supported yet (and make no sense)) reader = new ComplexArffReader(tokenizer, this, PARAMETER_SAMPLE_SIZE, PARAMETER_SAMPLE_RATIO, PARAMETER_DATAMANAGEMENT, PARAMETER_LOCAL_RANDOM_SEED, PARAMETER_DECIMAL_POINT_CHARACTER ); else reader = new FlatComplexArffReader(tokenizer, this, PARAMETER_SAMPLE_SIZE, PARAMETER_SAMPLE_RATIO, PARAMETER_DATAMANAGEMENT, PARAMETER_LOCAL_RANDOM_SEED, PARAMETER_DECIMAL_POINT_CHARACTER ); }else throw new IOException("expected the keyword "+ complexArffAnnotation +" in line " + tokenizer.lineno()); ComplexExampleTable table = reader.read(); for(Attribute attribute: table.getAttributes()){ if(attribute != null){ if (attribute.getName().equals(getParameterAsString(PARAMETER_LABEL_ATTRIBUTE))) { label = attribute; } else if (attribute.getName().equals(getParameterAsString(PARAMETER_ID_ATTRIBUTE))) { id = attribute; } else if (attribute.getName().equals(getParameterAsString(PARAMETER_WEIGHT_ATTRIBUTE))) { weight = attribute; } } } in.close(); Map<Attribute, String> specialMap = new HashMap<Attribute, String>(); specialMap.put(label, Attributes.LABEL_NAME); specialMap.put(weight, Attributes.WEIGHT_NAME); specialMap.put(id, Attributes.ID_NAME); ComplexExampleSet ces = table.createExampleSet(specialMap); return new IOObject[] { ces }; } catch (IOException e) { throw new UserError(this, 302, getParameterAsString(PARAMETER_DATA_FILE), e.getMessage()); } } @Override public Class<?>[] getOutputClasses() { return new Class[] { ComplexExampleSet.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeBoolean(PARAMETER_READ_COMPLEX_ATTRIBUTES,"select whether read or ignore complex information in complex arff file",true); type.setExpert(false); types.add(type); return types; } }