/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.meta; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DoubleArrayDataRow; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.operator.IOContainer; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.Model; import com.rapidminer.operator.ModelApplier; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorChain; import com.rapidminer.operator.OperatorCreationException; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.condition.AllInnerOperatorCondition; import com.rapidminer.operator.condition.InnerOperatorCondition; import com.rapidminer.operator.learner.PredictionModel; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.OperatorService; /** * This operator can be used for some basic series prediction operations. * The given series must be univariate and must be encoded by * examples, i.e. each point of time is encoded by the values in one * single example. The values which should be predicted must be defined * by the label attribute. Other attributes will be ignored. * * The operator creates time windows and learns a model from these * windows to predict the value of the label column after a certain amount * of values (horizon). After predicting a value, the window is moved with step * size 1 and the next value is predicted. All predictions are kept and can * be compared afterwards to the actual values in a series plot or with a performance * evaluation operator. * * If you want predictions for different horizons, you have to restart * this operator with different settings for horizon. This might be useful to * get a prediction for 1 to horizon future time steps. * * The inner learner must be able to work on numerical regression problems. * * @author Ingo Mierswa * @version $Id: UnivariateLabelSeriesPrediction.java,v 1.7 2008/07/24 15:23:22 ingomierswa Exp $ */ public class UnivariateLabelSeriesPrediction extends OperatorChain { public static final String PARAMETER_WINDOW_WIDTH = "window_width"; public static final String PARAMETER_MAX_TRAINING_SET_SIZE = "max_training_set_size"; public static final String PARAMETER_HORIZON = "horizon"; public UnivariateLabelSeriesPrediction(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { ExampleSet exampleSet = getInput(ExampleSet.class); Attribute label = exampleSet.getAttributes().getLabel(); // *** sanity checks if (label == null) throw new UserError(this, 105); if (label.isNominal()) throw new UserError(this, 102, "series predictions", label.getName()); // *** create training attributes and training table int windowWidth = getParameterAsInt(PARAMETER_WINDOW_WIDTH); List<Attribute> attributes = new LinkedList<Attribute>(); for (int i = 0; i < windowWidth; i++) { attributes.add(AttributeFactory.createAttribute("series_" + (i + 1), Ontology.REAL)); } Attribute newLabel = AttributeFactory.createAttribute("label", Ontology.REAL); attributes.add(newLabel); // *** create test attributes and test table List<Attribute> testAttributes = new LinkedList<Attribute>(); for (int i = 0; i < windowWidth; i++) { testAttributes.add(AttributeFactory.createAttribute("series_" + (i + 1), Ontology.REAL)); } // *** learn models and create predictions Operator modelApplier = null; try { modelApplier = OperatorService.createOperator(ModelApplier.class); } catch (OperatorCreationException e) { throw new OperatorException("Cannot create model applier: " + e.getMessage()); } int horizon = getParameterAsInt(PARAMETER_HORIZON); int maxTrainingSetSize = getParameterAsInt(PARAMETER_MAX_TRAINING_SET_SIZE); double[] predictions = new double[exampleSet.size()]; for (int i = 0; i < horizon; i++) predictions[i] = Double.NaN; for (int i = horizon; i < 2 * horizon + windowWidth - 1; i++) predictions[i] = exampleSet.getExample(i - horizon).getValue(label); // create actually learned predictions for (int toPredict = windowWidth + 2 * horizon - 1; toPredict < exampleSet.size(); toPredict++) { // learn only on recent examples int startIndex = Math.max(windowWidth + 2 * horizon - 1, toPredict - maxTrainingSetSize); MemoryExampleTable trainingTable = new MemoryExampleTable(attributes); for (int s = startIndex; s <= toPredict; s++) { double[] trainingData = new double[windowWidth + 1]; for (int d = 0; d < windowWidth; d++) { trainingData[d] = exampleSet.getExample(s - 2 * horizon - windowWidth + 1 + d).getValue(label); } trainingData[trainingData.length - 1] = exampleSet.getExample(s - horizon).getValue(label); trainingTable.addDataRow(new DoubleArrayDataRow(trainingData)); } // create training set and apply inner learner ExampleSet trainingSet = trainingTable.createExampleSet(newLabel); IOContainer innerResult = getOperator(0).apply(new IOContainer(trainingSet)); Model model = innerResult.get(Model.class); // create prediction example and apply model MemoryExampleTable testTable = new MemoryExampleTable(testAttributes); double[] testData = new double[windowWidth + 1]; for (int d = 0; d < windowWidth; d++) { testData[d] = exampleSet.getExample(toPredict - horizon - windowWidth + 1 + d).getValue(label); } testTable.addDataRow(new DoubleArrayDataRow(testData)); ExampleSet testSet = testTable.createExampleSet(); IOContainer applyResult = modelApplier.apply(new IOContainer(new IOObject[] { model, testSet })); ExampleSet predictionSet = applyResult.get(ExampleSet.class); double predictedValue = predictionSet.getExample(0).getPredictedLabel(); // store prediction predictions[toPredict] = predictedValue; PredictionModel.removePredictedLabel(predictionSet); checkForStop(); } Attribute predictedLabel = PredictionModel.createPredictedLabel(exampleSet, label); Iterator<Example> e = exampleSet.iterator(); int counter = 0; while (e.hasNext()) { Example example = e.next(); double prediction = predictions[counter]; example.setValue(predictedLabel, prediction); counter++; } return new IOObject[] { exampleSet }; } public InnerOperatorCondition getInnerOperatorCondition() { return new AllInnerOperatorCondition(new Class[] { ExampleSet.class }, new Class[] { Model.class }); } public int getMaxNumberOfInnerOperators() { return 1; } public int getMinNumberOfInnerOperators() { return 1; } public Class<?>[] getInputClasses() { return new Class[] { ExampleSet.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeInt(PARAMETER_WINDOW_WIDTH, "The number of values used as indicators for predicting the target value.", 1, Integer.MAX_VALUE, 10)); types.add(new ParameterTypeInt(PARAMETER_HORIZON, "The gap size used between training windows and prediction value.", 1, Integer.MAX_VALUE, 1)); types.add(new ParameterTypeInt(PARAMETER_MAX_TRAINING_SET_SIZE, "The maximum number of examples (windows) used for training the prediction model.", 1, Integer.MAX_VALUE, 10)); return types; } }