/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DoubleArrayDataRow; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.ParameterTypeString; /** * <p>This operator transforms a given example set containing series data into a * new example set containing single valued examples. For this purpose, windows with * a specified window and step size are moved across the series and the attribute * value lying horizon values after the window end is used as label which should * be predicted. In contrast to the {@link Series2WindowExamples} operator, this operator * can also handle multivariate series data. In order to specify the dimension which should * be predicted, one must use the parameter "label_dimension" (counting starts at * 0). If you want to predict all dimensions of your multivariate series you * must setup several process definitions with different label dimensions, one for each dimension.</p> * * <p> * The series data must be given as ExampleSet. The parameter "series_representation" * defines how the series data is represented by the ExampleSet:</p> * <ul> * <li>encode_series_by_examples</li>: the series index variable (e.g. time) is encoded by the * examples, i.e. there is a set of attributes (one for each dimension of the multivariate * series) and a set of examples. Each example encodes the value vector for a new time point, * each attribute value represents another dimension of the multivariate series. * <li>encode_series_by_attributes</li>: the series index variable (e.g. time) is encoded by * the attributes, i.e. there is a set of examples (one for each dimension of the multivariate * series) and a set of attributes. The set of attribute values for all examples encodes the * value vector for a new time point, each example represents another dimension of the * multivariate series. * </ul> * * <p>Please note that the encoding as examples is usually more efficient with respect to the * memory usage.</p> * * @author Ingo Mierswa * @version $Id: MultivariateSeries2WindowExamples.java,v 1.8 2008/09/08 18:53:49 ingomierswa Exp $ */ public class MultivariateSeries2WindowExamples extends Series2WindowExamples { public static final String PARAMETER_LABEL_ATTRIBUTE = "label_attribute"; public static final String PARAMETER_LABEL_DIMENSION = "label_dimension"; public MultivariateSeries2WindowExamples(OperatorDescription description) { super(description); } /** Subclasses might override this method. */ protected Attribute createLabel(int valueType) throws OperatorException { if (isParameterSet(PARAMETER_LABEL_ATTRIBUTE) || isParameterSet(PARAMETER_LABEL_DIMENSION)) { return AttributeFactory.createAttribute("label", valueType); } else { return null; } } public int getNumberOfAttributes(ExampleSet exampleSet, int representation, int windowWidth) { if (representation == SERIES_AS_EXAMPLES) { return windowWidth * exampleSet.getAttributes().size(); } else { return windowWidth * exampleSet.size(); } } public String getNameForAttribute(Attribute[] originalAttributeArray, int representation, int windowWidth, int horizon, int totalCounter) { if (representation == SERIES_AS_EXAMPLES) { int attributeIndex = totalCounter / windowWidth; // int division! String baseName = originalAttributeArray[attributeIndex].getName(); int windowIndex = totalCounter % windowWidth; return baseName + "-" + (windowWidth - windowIndex - 1); } else { return "Series_" + totalCounter; } } public void performChecks(ExampleSet exampleSet, int representation, int windowWidth, int stepSize, int horizon) throws OperatorException { if (isParameterSet(PARAMETER_LABEL_ATTRIBUTE) && isParameterSet(PARAMETER_LABEL_DIMENSION)) { throw new UserError(this, 209, PARAMETER_LABEL_ATTRIBUTE, PARAMETER_LABEL_DIMENSION); } if (representation == SERIES_AS_EXAMPLES) { if (exampleSet.size() < windowWidth + horizon) { // not enough examples throw new UserError(this, 110, "window width + horizon = " + (windowWidth + horizon)); } } else { if (exampleSet.getAttributes().size() < windowWidth + horizon) { // not enough attributes throw new UserError(this, 125, exampleSet.getAttributes().size(), (windowWidth + horizon)); } } } public void fillSeriesExampleTable(MemoryExampleTable table, ExampleSet exampleSet, int representation, int windowWidth, int stepSize, int horizon) throws OperatorException { int labelDimension = -1; if (isParameterSet(PARAMETER_LABEL_DIMENSION)) { labelDimension = getParameterAsInt(PARAMETER_LABEL_DIMENSION); } else { String labelAttributeName = getParameterAsString(PARAMETER_LABEL_ATTRIBUTE); labelDimension = -1; int index = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.getName().equals(labelAttributeName)) { labelDimension = index; break; } index++; } } if (representation == SERIES_AS_EXAMPLES) { for (int w = 0; w < exampleSet.size() - windowWidth - horizon; w+=stepSize) { int labelOffset = labelDimension == -1 ? 0 : 1; double[] data = new double[windowWidth * exampleSet.getAttributes().size() + labelOffset]; int a = 0; for (Attribute currentAttribute : exampleSet.getAttributes()) { for (int d = 0; d < windowWidth; d++) { data[a * windowWidth + d] = exampleSet.getExample(w + d).getValue(currentAttribute); } if (a == labelDimension) data[data.length - 1] = exampleSet.getExample(w + windowWidth + horizon).getValue(currentAttribute); a++; } table.addDataRow(new DoubleArrayDataRow(data)); checkForStop(); } } else { int lastAttribute = exampleSet.getAttributes().size() - windowWidth - horizon; Attribute[] attributeArray = new Attribute[exampleSet.getAttributes().size()]; int index = 0; for (Attribute attribute : exampleSet.getAttributes()) { attributeArray[index++] = attribute; } for (int w = 0; w < lastAttribute; w += stepSize) { int labelOffset = labelDimension == -1 ? 0 : 1; double[] data = new double[windowWidth * exampleSet.size() + labelOffset]; int counter = 0; Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { Example example = reader.next(); for (int d = 0; d < windowWidth; d++) { data[counter * windowWidth + d] = example.getValue(attributeArray[w + d]); } if (counter == labelDimension) { data[data.length - 1] = example.getValue(attributeArray[w + windowWidth + horizon]); } counter++; } table.addDataRow(new DoubleArrayDataRow(data)); checkForStop(); } } } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); for (ParameterType candidateType : types) { if (candidateType.getKey().equals(PARAMETER_HORIZON)) { ((ParameterTypeInt)candidateType).setMinValue(0); ((ParameterTypeInt)candidateType).setDefaultValue(0); candidateType.setExpert(true); } if (candidateType.getKey().equals(PARAMETER_SERIES_REPRESENTATION)) { ((ParameterTypeCategory)candidateType).setDefaultValue(SERIES_AS_EXAMPLES); } } ParameterType type = new ParameterTypeString(PARAMETER_LABEL_ATTRIBUTE, "The name of the attribute which should be used for creating the label values.", true); types.add(type); types.add(new ParameterTypeInt(PARAMETER_LABEL_DIMENSION, "The dimension which should be used for creating the label values (counting starts with 0).", 0, Integer.MAX_VALUE, true)); return types; } }