/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.visualization; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import com.rapidminer.datatable.DataTable; import com.rapidminer.datatable.DataTableRow; import com.rapidminer.datatable.SimpleDataTable; import com.rapidminer.datatable.SimpleDataTableRow; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.Value; import com.rapidminer.operator.ports.DummyPortPairExtender; import com.rapidminer.operator.ports.PortPairExtender; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeFile; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.parameter.ParameterTypeValue; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.parameter.ParameterTypeValue.OperatorValueSelection; import com.rapidminer.parameter.conditions.EqualTypeCondition; /** * This operator records almost arbitrary data. It can be written to a file which can then be read, e.g., by gnuplot. * Alternatively, the collected data can be plotted by the GUI. This is even possible during process runtime (i.e. * online plotting).<br/> * * Parameters in the list <code>log</code> are interpreted as follows: The <var>key</var> gives the name for the column * name (e.g. for use in the plotter). The <var>value</var> specifies where to retrieve the value from. This is best * explained by an example: * <ul> * <li>If the value is <code>operator.Evaluator.value.absolute</code>, the ProcessLogOperator looks up the operator with * the name <code>Evaluator</code>. If this operator is a * {@link com.rapidminer.operator.performance.PerformanceEvaluator}, it has a value named <var>absolute</var> which * gives the absolute error of the last evaluation. This value is queried by the ProcessLogOperator</li> * <li>If the value is <code>operator.SVMLearner.parameter.C</code>, the ProcessLogOperator looks up the parameter * <var>C</var> of the operator named <code>SVMLearner</code>.</li> * </ul> * Each time the ProcessLogOperator is applied, all the values and parameters specified by the list <var>log</var> are * collected and stored in a data row. When the process finishes, the operator writes the collected data rows to a file * (if specified). In GUI mode, 2D or 3D plots are automatically generated and displayed in the result viewer. <br/> * Please refer to section {@rapidminer.ref sec:parameter_optimization|Advanced Processes/Parameter and performance * analysis} for an example application. * * @rapidminer.todo Use IOObjects for logging as well (e.g. * {@link com.rapidminer.operator.performance.PerformanceVector}) * @author Simon Fischer, Ingo Mierswa */ public class ProcessLogOperator extends Operator { public static final String PARAMETER_COLUMN_NAME = "column_name"; /** The parameter name for "operator.OPERATORNAME.[value|parameter].VALUE_NAME" */ public static final String PARAMETER_COLUMN_VALUE = "value"; public static final String PARAMETER_FILENAME = "filename"; public static final String PARAMETER_LOG = "log"; public static final String PARAMETER_PERSISTENT = "persistent"; public static final String PARAMETER_SORTING_TYPE = "sorting_type"; public static final String PARAMETER_SORTING_DIMENSION = "sorting_dimension"; public static final String PARAMETER_SORTING_K = "sorting_k"; public static final String[] SORTING_TYPES = { "none", "top-k", "bottom-k" }; public static final int SORTING_TYPE_NONE = 0; public static final int SORTING_TYPE_TOP_K = 1; public static final int SORTING_TYPE_BOTTOM_K = 2; private PortPairExtender dummyPorts = new DummyPortPairExtender("through", getInputPorts(), getOutputPorts()); public ProcessLogOperator(OperatorDescription description) { super(description); dummyPorts.start(); getTransformer().addRule(dummyPorts.makePassThroughRule()); } private double fetchValue(OperatorValueSelection selection, int column) throws UndefinedParameterError { Operator operator = lookupOperator(selection.getOperator()); if (operator != null) { if (selection.isValue()) { Value value = operator.getValue(selection.getValueName()); if (value == null) { getLogger().warning("No such value in '" + selection + "'"); return Double.NaN; } if (value.isNominal()) { Object actualValue = value.getValue(); if (actualValue != null) { String valueString = value.getValue().toString(); SimpleDataTable table = (SimpleDataTable) getProcess().getDataTable(getName()); return table.mapString(column, valueString); } else { return Double.NaN; } } else { return ((Double) value.getValue()).doubleValue(); } } else { ParameterType parameterType = operator.getParameterType(selection.getParameterName()); if (parameterType == null) { logWarning("No such parameter in '" + selection + "'"); return Double.NaN; } else { if (parameterType.isNumerical()) { // numerical try { return Double.parseDouble(operator.getParameter(selection.getParameterName()).toString()); } catch (NumberFormatException e) { logWarning("Cannot parse parameter value of '" + selection + "'"); } } else { // nominal String value = parameterType.toString(operator.getParameter(selection.getParameterName())); SimpleDataTable table = (SimpleDataTable) getProcess().getDataTable(getName()); return table.mapString(column, value); } } } } else { logWarning("Unknown operator '" + selection.getOperator() + "' in '" + selection + "'"); } return Double.NaN; } private Collection<OperatorValueSelection> getValueDescriptions() throws UndefinedParameterError { List<String[]> parameters = getParameterList(PARAMETER_LOG); List<OperatorValueSelection> valueSelections = new LinkedList<OperatorValueSelection>(); for (String[] pair : parameters) { valueSelections.add(ParameterTypeValue.transformString2OperatorValueSelection(pair[1])); } return valueSelections; } public void createDataTable() throws OperatorException { List<String[]> parameters = getParameterList(PARAMETER_LOG); String columnNames[] = new String[parameters.size()]; Iterator<String[]> i = parameters.iterator(); int j = 0; while (i.hasNext()) { String[] parameter = i.next(); columnNames[j] = parameter[0]; j++; } getProcess().addDataTable(new SimpleDataTable(getName(), columnNames)); } @Override public void doWork() throws OperatorException { SimpleDataTable dataTable = (SimpleDataTable) getProcess().getDataTable(getName()); if (dataTable == null) { createDataTable(); } DataTableRow row = fetchAllValues(); if (getParameterAsInt(PARAMETER_SORTING_TYPE) == SORTING_TYPE_NONE && getParameterAsBoolean(PARAMETER_PERSISTENT)) { writeOnline(row); } dummyPorts.passDataThrough(); } private void writeOnline(DataTableRow row) throws UserError { DataTable table = getProcess().getDataTable(getName()); File outputFile = getParameterAsFile(PARAMETER_FILENAME, true); try { // writing header if file does not exist or applyCount is 1 and file exists and has to be overwritten if (!outputFile.exists() || getApplyCount() == 1) { PrintWriter out = new PrintWriter(new FileWriter(outputFile)); out.println("# Generated by " + getName() + "[" + getClass().getName() + "]"); for (int j = 0; j < table.getNumberOfColumns(); j++) { out.print((j != 0 ? "\t" : "# ") + table.getColumnName(j)); } out.println(); out.close(); } // writing actual data PrintWriter out = new PrintWriter(new FileWriter(outputFile, true)); for (int j = 0; j < row.getNumberOfValues(); j++) { out.print((j != 0 ? "\t" : "") + table.getValueAsString(row, j)); } out.println(); out.close(); } catch (IOException e) { throw new UserError(this, 303, outputFile, e.getMessage()); } } private DataTableRow fetchAllValues() throws UndefinedParameterError { Collection<OperatorValueSelection> valueDescriptions = getValueDescriptions(); double[] row = new double[valueDescriptions.size()]; int i = 0; for (OperatorValueSelection selection: valueDescriptions) { row[i] = fetchValue(selection, i); i++; } DataTableRow dataRow = new SimpleDataTableRow(row, null); SimpleDataTable dataTable = (SimpleDataTable)getProcess().getDataTable(getName()); int sortingType = getParameterAsInt(PARAMETER_SORTING_TYPE); if ((sortingType == SORTING_TYPE_NONE) || (dataTable.getNumberOfRows() < getParameterAsInt(PARAMETER_SORTING_K))) { dataTable.add(dataRow); } else { // sorting String sortingDimension = getParameterAsString(PARAMETER_SORTING_DIMENSION); int sortingDimensionIndex = dataTable.getColumnIndex(sortingDimension); if (dataTable.isNominal(sortingDimensionIndex)) { String currentWorst = null; int currentWorstIndex = -1; for (int r = 0; r < dataTable.getNumberOfRows(); r++) { double currentValue = dataTable.getRow(r).getValue(sortingDimensionIndex); String currentNominalValue = dataTable.mapIndex(sortingDimensionIndex, (int)currentValue); if ((currentWorst == null) || ((sortingType == SORTING_TYPE_TOP_K ) && (currentNominalValue.compareTo(currentWorst) < 0)) || ((sortingType == SORTING_TYPE_BOTTOM_K) && (currentNominalValue.compareTo(currentWorst)) > 0)) { currentWorst = currentNominalValue; currentWorstIndex = r; } } double candidateValue = dataRow.getValue(sortingDimensionIndex); String candidateNominalValue = dataTable.mapIndex(sortingDimensionIndex, (int)candidateValue); if ((currentWorstIndex >= 0) && ((sortingType == SORTING_TYPE_TOP_K ) && (candidateNominalValue.compareTo(currentWorst) > 0)) || ((sortingType == SORTING_TYPE_BOTTOM_K) && (candidateNominalValue.compareTo(currentWorst)) < 0)) { dataTable.remove(dataTable.getRow(currentWorstIndex)); dataTable.add(dataRow); dataTable.cleanMappingTables(); } } else { double currentWorst = Double.NaN; int currentWorstIndex = -1; for (int r = 0; r < dataTable.getNumberOfRows(); r++) { double currentValue = dataTable.getRow(r).getValue(sortingDimensionIndex); if ((Double.isNaN(currentWorst)) || ((sortingType == SORTING_TYPE_TOP_K ) && (currentValue < currentWorst)) || ((sortingType == SORTING_TYPE_BOTTOM_K) && (currentValue > currentWorst))) { currentWorst = currentValue; currentWorstIndex = r; } } double candidateValue = dataRow.getValue(sortingDimensionIndex); if ((currentWorstIndex >= 0) && ((sortingType == SORTING_TYPE_TOP_K ) && (candidateValue > currentWorst)) || ((sortingType == SORTING_TYPE_BOTTOM_K) && (candidateValue < currentWorst))) { dataTable.remove(dataTable.getRow(currentWorstIndex)); dataTable.add(dataRow); dataTable.cleanMappingTables(); } } } return dataRow; } @Override public void processFinished() throws OperatorException { super.processFinished(); if (!getParameterAsBoolean(PARAMETER_PERSISTENT)) { DataTable table = getProcess().getDataTable(getName()); if (table != null) { File file = null; try { file = getParameterAsFile(PARAMETER_FILENAME, true); } catch (UndefinedParameterError e) { // tries to determine a file for output writing // if no file was specified --> do not write results in file } if (file != null) { log("Writing data to '" + file.getName() + "'"); PrintWriter out = null; try { out = new PrintWriter(new FileWriter(file)); table.write(out); } catch (IOException e) { throw new UserError(this, 303, file.getName(), e.getMessage()); } finally { if (out != null) out.close(); } } } } } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeFile(PARAMETER_FILENAME, "File to save the data to.", "log", true); type.setExpert(false); types.add(type); type = new ParameterTypeList(PARAMETER_LOG, "List of key value pairs where the key is the column name and the value specifies the process value to log.", new ParameterTypeString(PARAMETER_COLUMN_NAME, "The name of the column in the process log."), new ParameterTypeValue(PARAMETER_COLUMN_VALUE, "operator.OPERATORNAME.[value|parameter].VALUE_NAME")); type.setExpert(false); types.add(type); types.add(new ParameterTypeCategory(PARAMETER_SORTING_TYPE, "Indicates if the logged values should be sorted according to the specified dimension.", SORTING_TYPES, SORTING_TYPE_NONE)); type = new ParameterTypeString(PARAMETER_SORTING_DIMENSION, "If the sorting type is set to top-k or bottom-k, this dimension is used for sorting.", true); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_SORTING_TYPE, SORTING_TYPES, true, SORTING_TYPE_TOP_K, SORTING_TYPE_BOTTOM_K)); types.add(type); type = new ParameterTypeInt(PARAMETER_SORTING_K, "If the sorting type is set to top-k or bottom-k, this number of results will be kept.", 1, Integer.MAX_VALUE, 100); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_SORTING_TYPE, SORTING_TYPES, false, SORTING_TYPE_TOP_K, SORTING_TYPE_BOTTOM_K)); types.add(type); type = new ParameterTypeBoolean(PARAMETER_PERSISTENT, "Indicates if results should be written to file immediately", false); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_SORTING_TYPE, SORTING_TYPES, false, SORTING_TYPE_NONE)); types.add(type); return types; } }