/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.validation;
import java.util.ArrayList;
import java.util.List;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.Partition;
import com.rapidminer.example.set.SplittedExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.performance.PerformanceVector;
import com.rapidminer.operator.visualization.ProcessLogOperator;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.math.AverageVector;
/**
* <p>
* This is a special validation chain which can only be used for series predictions where
* the time points are encoded as examples. It uses a certain window of examples for
* training and uses another window (after horizon examples, i.e. time points) for testing.
* The window is moved across the example set and all performance measurements are
* averaged afterwards. The parameter "cumulative_training" indicates if all
* former examples should be used for training (instead of only the current window).
* </p>
*
* <p>This validation operator provides several values which can be logged
* by means of a {@link ProcessLogOperator}. All performance estimation operators
* of RapidMiner provide access to the average values calculated during the estimation.
* Since the operator cannot ensure the names of the delivered criteria, the
* ProcessLog operator can access the values via the generic value names:</p>
* <ul>
* <li>performance: the value for the main criterion calculated by this validation operator</li>
* <li>performance1: the value of the first criterion of the performance vector calculated</li>
* <li>performance2: the value of the second criterion of the performance vector calculated</li>
* <li>performance3: the value of the third criterion of the performance vector calculated</li>
* <li>for the main criterion, also the variance and the standard deviation can be
* accessed where applicable.</li>
* </ul>
*
* @author Ingo Mierswa
* @version $Id: SlidingWindowValidation.java,v 1.11 2008/08/25 08:10:35 ingomierswa Exp $
*/
public class SlidingWindowValidation extends ValidationChain {
/** The parameter name for "Number of examples in the window which is used for training" */
public static final String PARAMETER_TRAINING_WINDOW_WIDTH = "training_window_width";
/** The parameter name for "Number of examples the window is moved after each iteration (-1: same as test window width)" */
public static final String PARAMETER_TRAINING_WINDOW_STEP_SIZE = "training_window_step_size";
/** The parameter name for "Number of examples which are used for testing (following after 'horizon' examples after the training window end)" */
public static final String PARAMETER_TEST_WINDOW_WIDTH = "test_window_width";
/** The parameter name for "Number of examples which are between the training and testing examples" */
public static final String PARAMETER_HORIZON = "horizon";
/** The parameter name for "Indicates if each training window should be added to the old one or should replace the old one." */
public static final String PARAMETER_CUMULATIVE_TRAINING = "cumulative_training";
/** The parameter name for "Indicates if only performance vectors should be averaged or all types of averagable result vectors" */
public static final String PARAMETER_AVERAGE_PERFORMANCES_ONLY = "average_performances_only";
public SlidingWindowValidation(OperatorDescription description) {
super(description);
}
public IOObject[] estimatePerformance(ExampleSet inputSet) throws OperatorException {
int trainingWindowWidth = getParameterAsInt(PARAMETER_TRAINING_WINDOW_WIDTH);
int testWindowWidth = getParameterAsInt(PARAMETER_TEST_WINDOW_WIDTH);
int stepSize = getParameterAsInt(PARAMETER_TRAINING_WINDOW_STEP_SIZE);
if (stepSize < 0)
stepSize = testWindowWidth;
int horizon = getParameterAsInt(PARAMETER_HORIZON) - 1;
int[] partition = new int[inputSet.size()];
int neededSize = trainingWindowWidth + horizon + testWindowWidth;
if (neededSize > partition.length) {
String reason = "(" + trainingWindowWidth + "+" + horizon + "+" + testWindowWidth + "=" + neededSize + ")";
throw new UserError(this, 110, reason);
}
// evaluation loop
List<AverageVector> averageVectors = new ArrayList<AverageVector>();
for (int trainingStart = 0; trainingStart < partition.length; trainingStart += stepSize) {
if ((trainingStart + trainingWindowWidth + horizon + testWindowWidth) > partition.length)
break;
// 0: training
// 1: testing
// 2: rest
int actualTrainingStart = trainingStart;
if (getParameterAsBoolean(PARAMETER_CUMULATIVE_TRAINING)) {
actualTrainingStart = 0;
}
for (int i = 0; i < partition.length; i++) {
if ((i >= actualTrainingStart) && (i < trainingStart + trainingWindowWidth)) {
partition[i] = 0;
} else if ((i >= trainingStart + trainingWindowWidth + horizon) &&
(i < trainingStart + trainingWindowWidth + horizon + testWindowWidth)) {
partition[i] = 1;
} else {
partition[i] = 2;
}
}
// train
SplittedExampleSet splittedES = new SplittedExampleSet(inputSet, new Partition(partition, 3));
splittedES.selectSingleSubset(0);
learn(splittedES);
// evaluate
splittedES.selectSingleSubset(1);
IOContainer evalOutput = evaluate(splittedES);
Tools.handleAverages(evalOutput, averageVectors, getParameterAsBoolean(PARAMETER_AVERAGE_PERFORMANCES_ONLY));
inApplyLoop();
}
// end evaluation loop
// set last result for plotting purposes. This is an average value and
// actually not the last performance value!
PerformanceVector averagePerformance = Tools.getPerformanceVector(averageVectors);
if (averagePerformance != null)
setResult(averagePerformance);
AverageVector[] result = new AverageVector[averageVectors.size()];
averageVectors.toArray(result);
return result;
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeInt(PARAMETER_TRAINING_WINDOW_WIDTH, "Number of examples in the window which is used for training", 1, Integer.MAX_VALUE, 100);
type.setExpert(false);
types.add(type);
type = new ParameterTypeInt(PARAMETER_TRAINING_WINDOW_STEP_SIZE, "Number of examples the window is moved after each iteration (-1: same as test window width)", -1, Integer.MAX_VALUE, -1);
types.add(type);
type = new ParameterTypeInt(PARAMETER_TEST_WINDOW_WIDTH, "Number of examples which are used for testing (following after 'horizon' examples after the training window end)", 1, Integer.MAX_VALUE, 100);
type.setExpert(false);
types.add(type);
type = new ParameterTypeInt(PARAMETER_HORIZON, "Increment from last training to first testing example (1 = next example).", 1, Integer.MAX_VALUE, 1);
types.add(type);
types.add(new ParameterTypeBoolean(PARAMETER_CUMULATIVE_TRAINING, "Indicates if each training window should be added to the old one or should replace the old one.", false));
types.add(new ParameterTypeBoolean(PARAMETER_AVERAGE_PERFORMANCES_ONLY, "Indicates if only performance vectors should be averaged or all types of averagable result vectors", true));
return types;
}
}