/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.util.Calendar; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.annotation.ResourceConsumptionEstimator; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.AttributeSetPrecondition; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.operator.ports.metadata.SetRelation; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeAttribute; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.parameter.conditions.EqualTypeCondition; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.OperatorResourceConsumptionHandler; import com.rapidminer.tools.Tools; /** * This operator changes a date attribute into a numerical one. It allows to * specify exactly which entity should be extracted and to which unit or date * it should relate. As an example, it is possible to extract seconds within * a minute. Analogously, it is also possible to extract the day within a month. * But it is also possible to extract the day within a week or within a year. * For all time units, it is also possible to extract the number which has passed * by since 1970-01-01 00:00. * * @author Tobias Malbrecht */ public class Date2Numerical extends AbstractDateDataProcessing { public static final String PARAMETER_ATTRIBUTE_NAME = "attribute_name"; public static final String PARAMETER_TIME_UNIT = "time_unit"; public static final String PARAMETER_KEEP_OLD_ATTRIBUTE = "keep_old_attribute"; Calendar calendar = null; public Date2Numerical(OperatorDescription description) { super(description); getExampleSetInputPort().addPrecondition(new AttributeSetPrecondition(getExampleSetInputPort(), AttributeSetPrecondition.getAttributesByParameter(this, PARAMETER_ATTRIBUTE_NAME))); } @Override protected MetaData modifyMetaData(ExampleSetMetaData metaData) throws UndefinedParameterError { AttributeMetaData amd = metaData.getAttributeByName(getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); if (amd != null) { AttributeMetaData newAttribute = amd.clone(); newAttribute.setType(Ontology.INTEGER); newAttribute.getMean().setUnkown(); newAttribute.setValueSetRelation(SetRelation.UNKNOWN); if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) metaData.removeAttribute(amd); else newAttribute.setName(newAttribute.getName() + "_" + TIME_UNITS[getParameterAsInt(PARAMETER_TIME_UNIT)]); metaData.addAttribute(newAttribute); } return metaData; } @Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME); int timeUnit = getParameterAsInt(PARAMETER_TIME_UNIT); int relativeTo = getParameterAsInt(PARAMETERS_RELATIVE_TO[timeUnit]); Attribute dateAttribute = exampleSet.getAttributes().get(attributeName); if (dateAttribute == null) { throw new UserError(this, 111, attributeName); } Attribute newAttribute = AttributeFactory.createAttribute(Ontology.INTEGER); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); calendar = Tools.getPreferredCalendar(); for (Example example : exampleSet) { double value = example.getValue(dateAttribute); if (Double.isNaN(value)) { example.setValue(newAttribute, value); } else { example.setValue(newAttribute, extract((long) value, timeUnit, relativeTo)); } } if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) { exampleSet.getAttributes().remove(dateAttribute); newAttribute.setName(attributeName); } else { newAttribute.setName(attributeName + "_" + TIME_UNITS[timeUnit]); } return exampleSet; } private double extract(long milliseconds, int timeUnit, int relativeTo) { calendar.setTimeInMillis(milliseconds); switch (timeUnit) { case MILLISECOND: switch (relativeTo) { case MILLISECOND_RELATIVE_TO_SECOND: return milliseconds % 1000; case MILLISECOND_RELATIVE_TO_EPOCH: return milliseconds; } break; case SECOND: switch (relativeTo) { case SECOND_RELATIVE_TO_MINUTE: return calendar.get(Calendar.SECOND); case SECOND_RELATIVE_TO_HOUR: return calendar.get(Calendar.MINUTE) * 60 + calendar.get(Calendar.SECOND); case SECOND_RELATIVE_TO_DAY: return calendar.get(Calendar.HOUR_OF_DAY) * 3600 + calendar.get(Calendar.MINUTE) * 60 + calendar.get(Calendar.SECOND); case SECOND_RELATIVE_TO_EPOCH: return milliseconds / 1000; } break; case MINUTE: switch (relativeTo) { case MINUTE_RELATIVE_TO_HOUR: return calendar.get(Calendar.MINUTE); case MINUTE_RELATIVE_TO_DAY: return calendar.get(Calendar.HOUR_OF_DAY) * 60 + calendar.get(Calendar.MINUTE); case MINUTE_RELATIVE_TO_EPOCH: return milliseconds / 60000; } case HOUR: switch (relativeTo) { case HOUR_RELATIVE_TO_DAY: return calendar.get(Calendar.HOUR_OF_DAY); case HOUR_RELATIVE_TO_EPOCH: return milliseconds / 3600000; } break; case DAY: switch (relativeTo) { case DAY_RELATIVE_TO_WEEK: return calendar.get(Calendar.DAY_OF_WEEK); case DAY_RELATIVE_TO_MONTH: return calendar.get(Calendar.DAY_OF_MONTH); case DAY_RELATIVE_TO_YEAR: return calendar.get(Calendar.DAY_OF_YEAR); case DAY_RELATIVE_TO_EPOCH: return milliseconds / 86400000; } break; case WEEK: switch (relativeTo) { case WEEK_RELATIVE_TO_MONTH: return calendar.get(Calendar.WEEK_OF_MONTH); case WEEK_RELATIVE_TO_YEAR: return calendar.get(Calendar.WEEK_OF_YEAR); case WEEK_RELATIVE_TO_EPOCH: return milliseconds / 604800000L; } break; case MONTH: switch (relativeTo) { case MONTH_RELATIVE_TO_QUARTER: return calendar.get(Calendar.MONTH) % 3 + 1; case MONTH_RELATIVE_TO_YEAR: return calendar.get(Calendar.MONTH) + 1; case MONTH_RELATIVE_TO_EPOCH: return (calendar.get(Calendar.MONTH) + 1) + (calendar.get(Calendar.YEAR) - 1970) * 12; } break; case QUARTER: switch (relativeTo) { case QUARTER_RELATIVE_TO_YEAR: return calendar.get(Calendar.MONTH) / 3 + 1; case QUARTER_RELATIVE_TO_EPOCH: return (calendar.get(Calendar.MONTH) / 3 + 1) + (calendar.get(Calendar.YEAR) - 1970) * 4; } break; case HALF_YEAR: switch (relativeTo) { case HALF_YEAR_RELATIVE_TO_YEAR: return calendar.get(Calendar.MONTH) / 6 + 1; case HALF_YEAR_RELATIVE_TO_EPOCH: return (calendar.get(Calendar.MONTH) / 6 + 1) + (calendar.get(Calendar.YEAR) - 1970) * 2; } break; case YEAR: switch (relativeTo) { case YEAR_RELATIVE_TO_ERA: return calendar.get(Calendar.YEAR); case YEAR_RELATIVE_TO_EPOCH: return calendar.get(Calendar.YEAR) - 1970; } break; default: break; } return Double.NaN; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeAttribute(PARAMETER_ATTRIBUTE_NAME, "The attribute which should be parsed.", getExampleSetInputPort(), false, false, Ontology.DATE_TIME)); types.add(new ParameterTypeCategory(PARAMETER_TIME_UNIT, "The unit in which the time is measured.", TIME_UNITS, 0, false)); ParameterType type = null; for (int i = 0; i < TIME_UNITS.length; i++) { type = new ParameterTypeCategory(PARAMETERS_RELATIVE_TO[i], "The unit the value is extracted relativ to.", RELATIVE_TO_MODES[i], RELATIVE_TO_DEFAULTS[i], false); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_TIME_UNIT, TIME_UNITS, true, i)); types.add(type); } types.add(new ParameterTypeBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE, "Indicates if the original date attribute should be kept.", false)); return types; } @Override public boolean writesIntoExistingData() { return false; } @Override public ResourceConsumptionEstimator getResourceConsumptionEstimator() { return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), Date2Numerical.class, null); } }