/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example; import java.util.LinkedList; import java.util.List; import com.rapidminer.tools.Tools; /** * Formats an example as specified by the format string. The dollar sign '$' is an escape character. * Squared brackets '[' and ']' have a special meaning. The following escape sequences are * interpreted: * <dl> * <dt>$a:</dt> * <dd>All attributes separated by the default separator</dd> * <dt>$a[separator]:</dt> * <dd>All attributes separated by separator</dd> * <dt>$s[separator][indexSeparator]:</dt> * <dd>Sparse format. For all non 0 attributes the following strings are concatenated: the column * index, the value of indexSeparator, the attribute value. Attributes are separated by separator.</dd> * <dt>$v[name]:</dt> * <dd>The value of the attribute with the given name (both regular and special attributes)</dd> * <dt>$k[index]:</dt> * <dd>The value of the attribute with the given index in the example set</dd> * <dt>$l:</dt> * <dd>The label</dd> * <dt>$p:</dt> * <dd>The predicted label</dd> * <dt>$d:</dt> * <dd>All prediction confidences for all classes in the form conf(class)=value</dd> * <dt>$d[class]:</dt> * <dd>The prediction confidence for the defined class as a simple number</dd> * <dt>$i:</dt> * <dd>The id</dd> * <dt>$w:</dt> * <dd>The weight</dd> * <dt>$c:</dt> * <dd>The cluster</dd> * <dt>$b:</dt> * <dd>The batch</dd> * <dt>$n:</dt> * <dd>The newline character</dd> * <dt>$t:</dt> * <dd>The tabulator character</dd> * <dt>$$:</dt> * <dd>The dollar sign</dd> * <dt>$[:</dt> * <dd>The '[' character</dd> * <dt>$]:</dt> * <dd>The ']' character</dd> * </dl> * * @author Simon Fischer, Ingo Mierswa Exp $ */ public class ExampleFormatter { /** Represents one piece of formatting. */ public static interface FormatCommand { public String format(Example example); } /** * Implements some simple format commands like 'a' for all attributes or 'l' for the label. */ public static class SimpleCommand implements FormatCommand { private char command; private String[] arguments; private int fractionDigits = -1; private boolean quoteNominal; private SimpleCommand(ExampleSet exampleSet, char command, String[] arguments, int fractionDigits, boolean quoteNominal) throws FormatterException { this.command = command; this.fractionDigits = fractionDigits; this.quoteNominal = quoteNominal; switch (command) { case 'a': if (arguments.length == 0) { arguments = new String[] { Example.SEPARATOR }; } break; case 's': if (arguments.length == 0) { arguments = new String[] { Example.SEPARATOR, Example.SPARSE_SEPARATOR }; } if (arguments.length == 1) { arguments = new String[] { arguments[0], Example.SPARSE_SEPARATOR }; } if (arguments.length == 2) { arguments = new String[] { arguments[0], arguments[1] }; } if (arguments.length > 2) { throw new FormatterException( "For command 's' only up to two arguments (separator and sparse separator) are allowed."); } break; case 'l': if (exampleSet.getAttributes().getLabel() == null) { throw new FormatterException("Example set does not provide 'label' attribute, $l will not work."); } break; case 'p': if (exampleSet.getAttributes().getPredictedLabel() == null) { throw new FormatterException( "Example set does not provide 'predicted label' attribute, $p will not work."); } break; case 'i': if (exampleSet.getAttributes().getId() == null) { throw new FormatterException("Example set does not provide 'id' attribute, $i will not work."); } break; case 'w': if (exampleSet.getAttributes().getWeight() == null) { throw new FormatterException("Example set does not provide 'weight' attribute, $w will not work."); } break; case 'c': if (exampleSet.getAttributes().getCluster() == null) { throw new FormatterException("Example set does not provide 'cluster' attribute, $c will not work."); } break; case 'b': if (exampleSet.getAttributes().getSpecial(Attributes.BATCH_NAME) == null) { throw new FormatterException("Example set does not provide 'batch' attribute, $b will not work."); } break; case 'd': if (exampleSet.getAttributes().getPredictedLabel() == null) { throw new FormatterException( "Example set does not provide 'confidence' attributes, $d will not work."); } break; default: throw new FormatterException("Unknown command: '" + command + "'"); } this.arguments = arguments; } @Override public String format(Example example) { Attributes attributes = example.getAttributes(); Attribute chosenAttribute = null; switch (command) { case 'a': { StringBuffer str = new StringBuffer(); boolean first = true; for (Attribute attribute : attributes) { if (!first) { str.append(arguments[0]); } str.append(example.getValueAsString(attribute, fractionDigits, quoteNominal)); first = false; } return str.toString(); } case 's': return example.getAttributesAsSparseString(arguments[0], arguments[1], fractionDigits, quoteNominal); case 'l': chosenAttribute = attributes.getLabel(); break; case 'p': chosenAttribute = attributes.getPredictedLabel(); break; case 'i': chosenAttribute = attributes.getId(); break; case 'w': chosenAttribute = attributes.getWeight(); break; case 'c': chosenAttribute = attributes.getCluster(); break; case 'b': chosenAttribute = attributes.getSpecial(Attributes.BATCH_NAME); break; case 'd': { if (arguments.length != 0) { return Tools.formatNumber(example.getConfidence(arguments[0]), fractionDigits); } StringBuffer str = new StringBuffer(); boolean first = true; for (String value : attributes.getPredictedLabel().getMapping().getValues()) { if (first) { first = false; } else { str.append(Example.SEPARATOR); } str.append( "conf(" + value + ")=" + Tools.formatNumber(example.getConfidence(value), fractionDigits)); } return str.toString(); } default: return command + ""; } return example.getValueAsString(chosenAttribute, fractionDigits, quoteNominal); } } /** Returns the value of an argument which must be an attribute's name. */ public static class ValueCommand implements FormatCommand { private Attribute attribute; private int fractionDigits = -1; private boolean quoteWhitespace = false; public ValueCommand(char command, String[] arguments, ExampleSet exampleSet, int fractionDigits, boolean quoteWhitespace) throws FormatterException { this.fractionDigits = fractionDigits; this.quoteWhitespace = quoteWhitespace; if (arguments.length < 1) { throw new FormatterException("Command 'v' needs argument!"); } switch (command) { case 'v': attribute = exampleSet.getAttributes().get(arguments[0]); if (attribute == null) { throw new FormatterException("Unknown attribute: '" + arguments[0] + "'!"); } break; case 'k': int column = -1; try { column = Integer.parseInt(arguments[0]); } catch (NumberFormatException e) { throw new FormatterException("Argument for 'k' must be an integer!"); } if ((column < 0) || (column >= exampleSet.getAttributes().size())) { throw new FormatterException("Illegal column: '" + arguments[0] + "'!"); } int counter = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (counter >= column) { this.attribute = attribute; break; } counter++; } if (attribute == null) { throw new FormatterException("Attribute #" + column + " not found."); } break; default: throw new FormatterException("Illegal command for ValueCommand: '" + command + "'"); } } @Override public String format(Example example) { return example.getValueAsString(attribute, fractionDigits, quoteWhitespace); } } /** Returns simply the given text. */ public static class TextCommand implements FormatCommand { private String text; private TextCommand(String text) { this.text = text; } @Override public String format(Example example) { return text; } } /** The commands used subsequently to format the example. */ private FormatCommand[] formatCommands; /** * Constructs a new ExampleFormatter that executes the given array of formatting commands. The * preferred way of creating an instance of ExampleFormatter is to * {@link ExampleFormatter#compile(String, ExampleSet, int, boolean)} a format string. */ public ExampleFormatter(FormatCommand[] formatCommands) { this.formatCommands = formatCommands; } /** * Factory method that compiles a format string and creates an instance of ExampleFormatter. */ public static ExampleFormatter compile(String formatString, ExampleSet exampleSet, int fractionDigits, boolean quoteWhitespace) throws FormatterException { List<FormatCommand> commandList = new LinkedList<FormatCommand>(); compile(formatString, exampleSet, commandList, fractionDigits, quoteWhitespace); FormatCommand[] commands = new FormatCommand[commandList.size()]; commandList.toArray(commands); return new ExampleFormatter(commands); } /** Adds all commands to the <code>commandList</code>. */ private static void compile(String formatString, ExampleSet exampleSet, List<FormatCommand> commandList, int fractionDigits, boolean quoteWhitespace) throws FormatterException { int start = 0; while (true) { int tagStart = formatString.indexOf("$", start); if (tagStart == -1) { commandList.add(new TextCommand(formatString.substring(start))); break; } if (tagStart == formatString.length() - 1) { throw new FormatterException("Format string ends in '$'."); } commandList.add(new TextCommand(formatString.substring(start, tagStart))); char command = formatString.charAt(tagStart + 1); if ((command == '$') || (command == '[') || (command == ']')) { commandList.add(new TextCommand("" + command)); start = tagStart + 2; continue; } else if (command == 'n') { commandList.add(new TextCommand(Tools.getLineSeparator())); start = tagStart + 2; continue; } else if (command == 't') { commandList.add(new TextCommand("\t")); start = tagStart + 2; continue; } start = tagStart + 2; List<String> argumentList = new LinkedList<String>(); while ((start < formatString.length()) && (formatString.charAt(start) == '[')) { int end = formatString.indexOf(']', start); if (end == -1) { throw new FormatterException("Unclosed '['!"); } argumentList.add(formatString.substring(start + 1, end)); start = end + 1; } String[] arguments = new String[argumentList.size()]; argumentList.toArray(arguments); switch (command) { case 'v': case 'k': commandList.add(new ValueCommand(command, arguments, exampleSet, fractionDigits, quoteWhitespace)); break; default: commandList.add(new SimpleCommand(exampleSet, command, arguments, fractionDigits, quoteWhitespace)); break; } } } /** Formats a single example. */ public String format(Example example) { StringBuffer str = new StringBuffer(); for (int i = 0; i < formatCommands.length; i++) { str.append(formatCommands[i].format(example)); } return str.toString(); } }