/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
/**
* <p>This operator transforms a given example set containing series data into a
* new example set containing single valued examples. For this purpose, windows with
* a specified window and step size are moved across the series and the series
* value lying horizon values after the window end is used as label which should
* be predicted. This operator can only be used for univariate series prediction.
* For the multivariate case, please use the operator
* {@link com.rapidminer.operator.preprocessing.filter.MultivariateSeries2WindowExamples}.</p>
*
* <p>
* The series data must be given as ExampleSet. The parameter "series_representation"
* defines how the series data is represented by the ExampleSet:</p>
* <ul>
* <li>encode_series_by_examples</li>: the series index variable (e.g. time) is encoded by the examples,
* i.e. there is a <em>single</em> attribute and a set of examples. Each example encodes the value for a new time point.
* <li>encode_series_by_attributes</li>: the series index variable (e.g. time) is encoded by the attributes,
* i.e. there is a (set of) examples and a set of attributes. Each attribute value encodes the value for a
* new time point. If there is more than one example, the windowing is performed for each example independently
* and all resulting window examples are merged into a complete example set.
* </ul>
*
* <p>Please note that the encoding as examples is usually more efficient with respect to the
* memory usage. To ensure backward compatibility, the default representation is, however, set
* to time_as_attributes.</p>
*
* @author Ingo Mierswa
* @version $Id: UnivariateSeries2WindowExamples.java,v 1.6 2008/09/08 18:53:49 ingomierswa Exp $
*/
public class UnivariateSeries2WindowExamples extends Series2WindowExamples {
public UnivariateSeries2WindowExamples(OperatorDescription description) {
super(description);
}
public int getNumberOfAttributes(ExampleSet exampleSet, int representation, int windowWidth) {
return windowWidth;
}
public void performChecks(ExampleSet exampleSet, int representation, int windowWidth, int stepSize, int horizon) throws OperatorException {
if (representation == SERIES_AS_EXAMPLES) {
if (exampleSet.getAttributes().size() != 1) {
throw new UserError(this, 133, Integer.valueOf(1), Integer.valueOf(exampleSet.getAttributes().size()));
}
if (exampleSet.size() < windowWidth + horizon) {
// not enough examples
throw new UserError(this, 110, "window width + horizon = " + (windowWidth + horizon));
}
} else {
if (exampleSet.size() != 1) {
logWarning("The sliding windows will be applied on each example of the example set, if you intended to perform a multivariate series2examples transformation, please use the corresponding operator");
}
if (exampleSet.getAttributes().size() < windowWidth + horizon) {
// not enough attributes
throw new UserError(this, 125, Integer.valueOf(exampleSet.getAttributes().size()), Integer.valueOf(windowWidth + horizon));
}
}
}
public String getNameForAttribute(Attribute[] originalAttributeArray, int representation, int windowWidth, int horizon, int totalCounter) {
if (representation == SERIES_AS_EXAMPLES) {
String baseName = originalAttributeArray[0].getName();
return baseName + "-" + (windowWidth - totalCounter);
} else {
return "Series_" + (windowWidth - totalCounter);
}
}
public void fillSeriesExampleTable(MemoryExampleTable table, ExampleSet exampleSet, int representation, int windowWidth, int stepSize, int horizon)
throws OperatorException {
if (representation == SERIES_AS_EXAMPLES) {
Attribute seriesAttribute = exampleSet.getAttributes().iterator().next();
for (int w = 0; w < exampleSet.size() - windowWidth - horizon; w+=stepSize) {
double[] data = new double[windowWidth + 1];
for (int d = 0; d < data.length - 1; d++) {
data[d] = exampleSet.getExample(w + d).getValue(seriesAttribute);
}
data[data.length - 1] = exampleSet.getExample(w + windowWidth + horizon).getValue(seriesAttribute);
table.addDataRow(new DoubleArrayDataRow(data));
checkForStop();
}
} else {
Attribute[] attributeArray = exampleSet.getAttributes().createRegularAttributeArray();
int lastAttribute = exampleSet.getAttributes().size() - windowWidth - horizon;
for (Example example : exampleSet) {
for (int w = 0; w < lastAttribute; w += stepSize) {
double[] data = new double[windowWidth + 1];
for (int d = 0; d < data.length - 1; d++) {
data[d] = example.getValue(attributeArray[w + d]);
}
data[data.length - 1] = example.getValue(attributeArray[w + windowWidth + horizon]);
table.addDataRow(new DoubleArrayDataRow(data));
checkForStop();
}
}
}
}
}