package de.tud.inf.operator.learner.meta;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.AttributeValueFilter;
import com.rapidminer.example.set.AttributeValueFilterSingleCondition;
import com.rapidminer.example.set.Condition;
import com.rapidminer.example.set.ConditionedExampleSet;
import com.rapidminer.example.set.SortedExampleSet;
import com.rapidminer.operator.AbstractIOObject;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.Learner;
import com.rapidminer.operator.learner.LearnerCapability;
import com.rapidminer.operator.learner.functions.LinearRegression;
import com.rapidminer.operator.learner.meta.AbstractMetaLearner;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
public class SlidingWindowLearner extends AbstractMetaLearner {
public static final String ENSAMBLE_WINDOW_SIZE = "window size";
public static final String ENSAMBLE_STATE_FILE = "state file";
public static final String ENSABMLE_OVERRIDE = "override";
private boolean firstRun = true;
public Model learn(ExampleSet exampleSet) throws OperatorException {
boolean override = this.getParameterAsBoolean(ENSABMLE_OVERRIDE);
int windowSize = this.getParameterAsInt(ENSAMBLE_WINDOW_SIZE);
File stateFile = this.getParameterAsFile(ENSAMBLE_STATE_FILE);
ExampleSet updatedSet = null;
SlidingWindowLearnerModel learningModel = null;
/*check process setup*/
checkInnerOperator();
if((!override && stateFile.exists()) || (override && !firstRun)) {
// load the model from the state file
learningModel = loadModel(stateFile);
} else {
/* create a new Model*/
learningModel = new SlidingWindowLearnerModel(exampleSet);
firstRun = false;
}
if(exampleSetsCompatible(learningModel.getTraining(), exampleSet) == false) {
logError("New example set not compatible to example set used for training ensemble so far");
throw new OperatorException("Example sets not compatible");
}
/* new Examples available? */
Condition newIds = new AttributeValueFilterSingleCondition(
exampleSet.getAttributes().getId(),
AttributeValueFilterSingleCondition.GREATER, String
.valueOf(learningModel.getRecentId()));
ConditionedExampleSet newIDSet = new ConditionedExampleSet(exampleSet,newIds);
if(newIDSet.size() > 0) {
/* neue Examples vorhanden */
updatedSet = exampleSet;
}
/* update the Model*/
if(updatedSet != null) {
/*sort according to the id*/
updatedSet = new SortedExampleSet(updatedSet,updatedSet.getAttributes().getId(),SortedExampleSet.DECREASING);
learningModel.setRecentId(updatedSet.getExample(0).getId());
learningModel.setLeastId((updatedSet.size() > windowSize ? updatedSet.getExample(windowSize - 1).getId() : updatedSet.getExample(updatedSet.size() - 1).getId()));
/* train the new model*/
Condition windowCondition = new AttributeValueFilter(updatedSet,
"id >= " + learningModel.getLeastId() + " && id <= " + learningModel.getRecentId());
ConditionedExampleSet trainingSet = new ConditionedExampleSet(updatedSet,windowCondition);
learningModel.setTraining(exampleSet);
learningModel.setTrainingSize(trainingSet.size());
Model newModel = getOperator(0).apply(new IOContainer(new IOObject[] {trainingSet})).get(Model.class);
learningModel.setPredictionModel(newModel);
/*save the state of the model*/
saveModel(learningModel, stateFile);
}
return learningModel;
}
public SlidingWindowLearner(OperatorDescription description) throws UserError {
super(description);
}
@Override
public int getMaxNumberOfInnerOperators() {
return 1;
}
public void checkInnerOperator() throws UserError {
// inner Operator must be a Learner and has to support numerical labels
Operator innerOperator = this.getOperator(0);
if (!(innerOperator instanceof Learner)
|| (innerOperator instanceof Learner && !((Learner) innerOperator)
.supportsCapability(LearnerCapability.NUMERICAL_CLASS)))
throw new UserError(this, 127, "Base Learner is not supported");
}
@Override
public List<ParameterType> getParameterTypes() {
ParameterType type;
List<ParameterType> types = super.getParameterTypes();
type = new ParameterTypeInt(ENSAMBLE_WINDOW_SIZE,"The number of Examples to consider",0,Integer.MAX_VALUE,5);
type.setExpert(false);
types.add(type);
type = new ParameterTypeFile(ENSAMBLE_STATE_FILE,"path to the ensemble state file","mod",false);
type.setExpert(false);
types.add(type);
type = new ParameterTypeBoolean(ENSABMLE_OVERRIDE,"Indicates if the state file should be overridden on the first run",false);
type.setExpert(false);
types.add(type);
return types;
}
public SlidingWindowLearnerModel loadModel(File file) throws UserError {
SlidingWindowLearnerModel loadedModel = null;
InputStream in = null;
try {
in = new FileInputStream(file);
loadedModel = (SlidingWindowLearnerModel) AbstractIOObject.read(in);
} catch (FileNotFoundException e) {
throw new UserError(this,301,file.getPath());
} catch (IOException e) {
throw new UserError(this,302,file.getAbsolutePath(),e.getCause());
} finally {
if(in != null) {
try {
in.close();
} catch (IOException e) {
logError("Cannot close stream from file " + file);
}
}
}
return loadedModel;
}
public void saveModel(Model model, File file) throws UserError {
OutputStream out = null;
try {
out = new FileOutputStream(file);
if(model != null)
model.write(out);
} catch (FileNotFoundException e) {
throw new UserError(this,301,file.getPath());
} catch (IOException e) {
throw new UserError(this,303,file,e.getCause());
} finally {
if(out != null)
try {
out.close();
} catch (IOException e) {
logError("Cannot close stream to file " + file);
}
}
}
private boolean exampleSetsCompatible(ExampleSet setOld, ExampleSet setNew) {
Attributes oldAttrib = setOld.getAttributes();
Attributes newAttrib = setNew.getAttributes();
Iterator<Attribute> oldIter = oldAttrib.iterator();
Iterator<Attribute> newIter = newAttrib.iterator();
if(oldAttrib.allSize() != newAttrib.allSize()) {
return false;
}
while(oldIter.hasNext()) {
Attribute oldA = oldIter.next();
Attribute newA = newIter.next();
if(! oldA.getName().equals(newA.getName())) {
return false;
}
}
if(!(setNew.getAttributes().getId() != null)) {
return false;
}
if(!(setOld.size() <= setNew.size())) {
return false;
}
return true;
}
}