/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Locale; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.annotation.ResourceConsumptionEstimator; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.parameter.conditions.EqualTypeCondition; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.OperatorResourceConsumptionHandler; /** * <p>This operator tries to parse numerical values and formats them in the specified * number format. It also supports different kinds of numbers, including integers (123), * fixed-point numbers (123.4), scientific notation (1.23E4), percentages (12%), * and currency amounts ($123). The format type parameter specifies the basic format, * in all cases but for "pattern" the specified locale will be used. In * case of pattern the locale is ignored and the specified pattern is used instead.</p> * * <p>Please note that this operator only works on numerical attributes * and the result will be in any case a nominal attribute no matter if the resulting * format would again be a parsable number.</p> * * <p>In case of the pattern format type, a pattern parameter is used to define the * format. If two different formats for positive and negative numbers should be used, those * formats can be defined by a separating ';'. The pattern must have the following * structure: <br /><br /> * * pattern := subpattern{;subpattern} <br /> * subpattern := {prefix}integer{.fraction}{suffix} <br /> * prefix := any character combination including white space <br /> * suffix := any character combination including white space <br /> * integer := '#'* '0'* '0' <br /> * fraction := '0'* '#'* <br /> * </p> * * <p>The following placeholders can be used within the pattern parameter: <br /><br /> * * 0   a digit <br /> * #   a digit, zero shows as absent <br /> * .   placeholder for decimal separator <br /> * ,   placeholder for grouping separator. <br /> * E   separates mantissa and exponent for exponential formats. <br /> * -   default negative prefix. <br /> * %   multiply by 100 and show as percentage <br /> * X   any other characters can be used in the prefix or suffix <br /> * '   used to quote special characters in a prefix or suffix. <br /> * </p> * * @author Mierswa */ public class NumericToFormattedNominal extends NumericToNominal { public static final String PARAMETER_FORMAT_TYPE = "format_type"; public static final String PARAMETER_PATTERN = "pattern"; public static final String PARAMETER_LOCALE = "locale"; public static final String PARAMETER_USE_GROUPING = "use_grouping"; public static final String[] FORMAT_TYPES = new String[] { "number", "integer", "currency", "percent", "pattern" }; public static final int FORMAT_TYPE_NUMBER = 0; public static final int FORMAT_TYPE_INTEGER = 1; public static final int FORMAT_TYPE_CURRENCY = 2; public static final int FORMAT_TYPE_PERCENT = 3; public static final int FORMAT_TYPE_PATTERN = 4; public static List<Locale> availableLocales = new ArrayList<Locale>(); public static String[] availableLocaleNames; public static int defaultLocale; static { Locale[] availableLocaleArray = Locale.getAvailableLocales(); for (Locale l : availableLocaleArray) { availableLocales.add(l); } Collections.sort(availableLocales, new Comparator<Locale> () { public int compare(Locale o1, Locale o2) { return o1.getDisplayName().compareTo(o2.getDisplayName()); } }); availableLocaleNames = new String[availableLocales.size()]; defaultLocale = -1; for (int i = 0; i < availableLocales.size(); i++) { Locale currentLocale = availableLocales.get(i); availableLocaleNames[i] = currentLocale.getDisplayName(); if (currentLocale.equals(Locale.US)) { defaultLocale = i; } } if (defaultLocale < 0) defaultLocale = 0; } private NumberFormat numberFormat; public NumericToFormattedNominal(OperatorDescription description) { super(description); } @Override public void init() throws OperatorException { int localeIndex = getParameterAsInt(PARAMETER_LOCALE); Locale selectedLocale = Locale.US; if ((localeIndex >= 0) && (localeIndex < availableLocales.size())) selectedLocale = availableLocales.get(getParameterAsInt(PARAMETER_LOCALE)); int formatType = getParameterAsInt(PARAMETER_FORMAT_TYPE); switch (formatType) { case FORMAT_TYPE_NUMBER: this.numberFormat = NumberFormat.getNumberInstance(selectedLocale); break; case FORMAT_TYPE_INTEGER: this.numberFormat = NumberFormat.getIntegerInstance(selectedLocale); break; case FORMAT_TYPE_CURRENCY: this.numberFormat = NumberFormat.getCurrencyInstance(selectedLocale); break; case FORMAT_TYPE_PERCENT: this.numberFormat = NumberFormat.getPercentInstance(selectedLocale); break; case FORMAT_TYPE_PATTERN: String formatString = getParameterAsString(PARAMETER_PATTERN); // the following line only works for Java Versions >= 6 //this.numberFormat = new DecimalFormat(formatString, DecimalFormatSymbols.getInstance(selectedLocale)); this.numberFormat = new DecimalFormat(formatString, new DecimalFormatSymbols(selectedLocale)); break; } this.numberFormat.setGroupingUsed(getParameterAsBoolean(PARAMETER_USE_GROUPING)); } @Override public void cleanUp() throws OperatorException { this.numberFormat = null; } @Override protected void setValue(Example example, Attribute newAttribute, double value) { if (Double.isNaN(value)) { example.setValue(newAttribute, Double.NaN); } else { String newValue = this.numberFormat.format(value); example.setValue(newAttribute, newAttribute.getMapping().mapString(newValue)); } } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeCategory(PARAMETER_FORMAT_TYPE, "Number formatting will be performed according to the selected type.", FORMAT_TYPES, FORMAT_TYPE_NUMBER); types.add(type); type = new ParameterTypeString(PARAMETER_PATTERN, "The format string, e.g. '0.###E0 m/s'.", true); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_FORMAT_TYPE, FORMAT_TYPES, true, FORMAT_TYPE_PATTERN)); types.add(type); type = new ParameterTypeCategory(PARAMETER_LOCALE, "The used locale for date texts, for example \"Wed\" (English) in contrast to \"Mi\" (German).", availableLocaleNames, defaultLocale); type.setExpert(false); types.add(type); type = new ParameterTypeBoolean(PARAMETER_USE_GROUPING, "Indicates if a grouping character should be used for larger numbers (e.g. ',' for the US or '.' for Germany).", false); type.setExpert(false); types.add(type); return types; } @Override protected int getGeneratedAttributevalueType() { return Ontology.NOMINAL; } @Override public ResourceConsumptionEstimator getResourceConsumptionEstimator() { return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), NumericToFormattedNominal.class, null); } }