/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.tools.Tools;
/**
* Formats an example as specified by the format string. The dollar sign '$' is
* an escape character. Squared brackets '[' and ']' have a special meaning. The
* following escape sequences are interpreted:
* <dl>
* <dt>$a:</dt>
* <dd> All attributes separated by the default separator</dd>
* <dt>$a[separator]:</dt>
* <dd> All attributes separated by separator</dd>
* <dt>$s[separator][indexSeparator]:</dt>
* <dd> Sparse format. For all non 0 attributes the following strings are
* concatenated: the column index, the value of indexSeparator, the attribute
* value. Attributes are separated by separator.</dd>
* <dt>$v[name]:</dt>
* <dd> The value of the attribute with the given name (both regular and special
* attributes)</dd>
* <dt>$k[index]:</dt>
* <dd> The value of the attribute with the given index in the example set</dd>
* <dt>$l:</dt>
* <dd> The label</dd>
* <dt>$p:</dt>
* <dd> The predicted label</dd>
* <dt>$d:</dt>
* <dd> All prediction confidences for all classes in the form conf(class)=value</dd>
* <dt>$d[class]:</dt>
* <dd> The prediction confidence for the defined class as a simple number</dd>
* <dt>$i:</dt>
* <dd> The id</dd>
* <dt>$w:</dt>
* <dd> The weight</dd>
* <dt>$c:</dt>
* <dd> The cluster</dd>
* <dt>$b:</dt>
* <dd> The batch</dd>
* <dt>$n:</dt>
* <dd> The newline character</dd>
* <dt>$t:</dt>
* <dd> The tabulator character</dd>
* <dt>$$:</dt>
* <dd> The dollar sign</dd>
* <dt>$[:</dt>
* <dd> The '[' character</dd>
* <dt>$]:</dt>
* <dd> The ']' character</dd>
* </dl>
*
* @author Simon Fischer, Ingo Mierswa
* @version $Id: ExampleFormatter.java,v 2.15 2006/03/27 13:21:58 ingomierswa
* Exp $
*/
public class ExampleFormatter {
/** Represents one piece of formatting. */
public static interface FormatCommand {
public String format(Example example);
}
/**
* Implements some simple format commands like 'a' for all attributes or 'l'
* for the label.
*/
public static class SimpleCommand implements FormatCommand {
private char command;
private String[] arguments;
private int fractionDigits = -1;
private SimpleCommand(ExampleSet exampleSet, char command, String[] arguments, int fractionDigits) throws FormatterException {
this.command = command;
this.fractionDigits = fractionDigits;
if ((command != 'a') && (command != 's') && (command != 'l') && (command != 'p') && (command != 'd') && (command != 'i') && (command != 'w') && (command != 'c') && (command != 'b'))
throw new FormatterException("Unknown command: '" + command + "'");
switch (command) {
case 'a':
if (arguments.length == 0)
arguments = new String[] { Example.SEPARATOR };
break;
case 's':
if (arguments.length == 0)
arguments = new String[] { Example.SEPARATOR, Example.SPARSE_SEPARATOR };
if (arguments.length == 1)
arguments = new String[] { arguments[0], Example.SPARSE_SEPARATOR };
if (arguments.length == 2)
arguments = new String[] { arguments[0], arguments[1] };
if (arguments.length > 2) {
throw new FormatterException("For command 's' only up to two arguments (separator and sparse separator) are allowed.");
}
break;
case 'l':
if (exampleSet.getAttributes().getLabel() == null)
throw new FormatterException("Example set does not provide 'label' attribute, $l will not work.");
break;
case 'p':
if (exampleSet.getAttributes().getPredictedLabel() == null)
throw new FormatterException("Example set does not provide 'predicted label' attribute, $p will not work.");
break;
case 'i':
if (exampleSet.getAttributes().getId() == null)
throw new FormatterException("Example set does not provide 'id' attribute, $i will not work.");
break;
case 'w':
if (exampleSet.getAttributes().getWeight() == null)
throw new FormatterException("Example set does not provide 'weight' attribute, $w will not work.");
break;
case 'c':
if (exampleSet.getAttributes().getCluster() == null)
throw new FormatterException("Example set does not provide 'cluster' attribute, $c will not work.");
break;
case 'b':
if (exampleSet.getAttributes().getSpecial(Attributes.BATCH_NAME) == null)
throw new FormatterException("Example set does not provide 'batch' attribute, $b will not work.");
break;
case 'd':
if (exampleSet.getAttributes().getPredictedLabel() == null)
throw new FormatterException("Example set does not provide 'confidence' attributes, $d will not work.");
break;
default:
break;
}
this.arguments = arguments;
}
public String format(Example example) {
switch (command) {
case 'a':
StringBuffer str = new StringBuffer();
boolean first = true;
for (Attribute attribute : example.getAttributes()) {
if (!first)
str.append(arguments[0]);
str.append(example.getValueAsString(attribute, fractionDigits, true));
first = false;
}
return str.toString();
case 's':
return example.getAttributesAsSparseString(arguments[0], arguments[1], fractionDigits, true);
case 'l':
return example.getValueAsString(example.getAttributes().getLabel(), fractionDigits, true);
case 'p':
return example.getValueAsString(example.getAttributes().getPredictedLabel(), fractionDigits, true);
case 'i':
return example.getValueAsString(example.getAttributes().getId(), fractionDigits, true);
case 'w':
return example.getValueAsString(example.getAttributes().getWeight(), fractionDigits, true);
case 'c':
return example.getValueAsString(example.getAttributes().getCluster(), fractionDigits, true);
case 'b':
return example.getValueAsString(example.getAttributes().getSpecial(Attributes.BATCH_NAME), fractionDigits, true);
case 'd':
if (arguments.length == 0) {
Iterator i = example.getAttributes().getPredictedLabel().getMapping().getValues().iterator();
StringBuffer result = new StringBuffer();
int index = 0;
while (i.hasNext()) {
String value = (String) i.next();
if (index != 0)
result.append(Example.SEPARATOR);
result.append("conf(" + value + ")=" + Tools.formatNumber(example.getConfidence(value), fractionDigits));
index++;
}
return result.toString();
} else {
return Tools.formatNumber(example.getConfidence(arguments[0]), fractionDigits);
}
default:
return command + "";
}
}
}
/** Returns the value of an argument which must be an attribute's name. */
public static class ValueCommand implements FormatCommand {
private Attribute attribute;
private int fractionDigits = -1;
private boolean quoteWhitespace = false;
public ValueCommand(char command, String[] arguments, ExampleSet exampleSet, int fractionDigits, boolean quoteWhitespace) throws FormatterException {
this.fractionDigits = fractionDigits;
this.quoteWhitespace = quoteWhitespace;
if (arguments.length < 1)
throw new FormatterException("Command 'v' needs argument!");
switch (command) {
case 'v':
attribute = exampleSet.getAttributes().get(arguments[0]);
if (attribute == null)
throw new FormatterException("Unknown attribute: '" + arguments[0] + "'!");
break;
case 'k':
int column = -1;
try {
column = Integer.parseInt(arguments[0]);
} catch (NumberFormatException e) {
throw new FormatterException("Argument for 'k' must be an integer!");
}
if ((column < 0) || (column >= exampleSet.getAttributes().size()))
throw new FormatterException("Illegal column: '" + arguments[0] + "'!");
int counter = 0;
for (Attribute attribute : exampleSet.getAttributes()) {
if (counter >= column) {
this.attribute = attribute;
break;
}
}
break;
default:
throw new FormatterException("Illegal command for ValueCommand: '" + command + "'");
}
}
public String format(Example example) {
return example.getValueAsString(attribute, fractionDigits, quoteWhitespace);
}
}
/** Returns simply the given text. */
public static class TextCommand implements FormatCommand {
private String text;
private TextCommand(String text) {
this.text = text;
}
public String format(Example example) {
return text;
}
}
/** The commands used subsequently to format the example. */
private FormatCommand[] formatCommands;
/**
* Constructs a new ExampleFormatter that executes the given array of
* formatting commands. The preferred way of creating an instance of
* ExampleFormatter is to
* {@link ExampleFormatter#compile(String, ExampleSet, int, boolean)} a format string.
*/
public ExampleFormatter(FormatCommand[] formatCommands) {
this.formatCommands = formatCommands;
}
/**
* Factory method that compiles a format string and creates an instance of
* ExampleFormatter.
*/
public static ExampleFormatter compile(String formatString, ExampleSet exampleSet, int fractionDigits, boolean quoteWhitespace) throws FormatterException {
List<FormatCommand> commandList = new LinkedList<FormatCommand>();
compile(formatString, exampleSet, commandList, fractionDigits, quoteWhitespace);
FormatCommand[] commands = new FormatCommand[commandList.size()];
commandList.toArray(commands);
return new ExampleFormatter(commands);
}
/** Adds all commands to the <code>commandList</code>. */
private static void compile(String formatString, ExampleSet exampleSet, List<FormatCommand> commandList, int fractionDigits, boolean quoteWhitespace) throws FormatterException {
int start = 0;
while (true) {
int tagStart = formatString.indexOf("$", start);
if (tagStart == -1) {
commandList.add(new TextCommand(formatString.substring(start)));
break;
}
if (tagStart == formatString.length() - 1)
throw new FormatterException("Format string ends in '$'.");
commandList.add(new TextCommand(formatString.substring(start, tagStart)));
char command = formatString.charAt(tagStart + 1);
if ((command == '$') || (command == '[') || (command == ']')) {
commandList.add(new TextCommand("" + command));
start = tagStart + 2;
continue;
} else if (command == 'n') {
commandList.add(new TextCommand(Tools.getLineSeparator()));
start = tagStart + 2;
continue;
} else if (command == 't') {
commandList.add(new TextCommand("\t"));
start = tagStart + 2;
continue;
}
start = tagStart + 2;
List<String> argumentList = new LinkedList<String>();
while ((start < formatString.length()) && (formatString.charAt(start) == '[')) {
int end = formatString.indexOf(']', start);
if (end == -1)
throw new FormatterException("Unclosed '['!");
argumentList.add(formatString.substring(start + 1, end));
start = end + 1;
}
String[] arguments = new String[argumentList.size()];
argumentList.toArray(arguments);
switch (command) {
case 'v':
case 'k':
commandList.add(new ValueCommand(command, arguments, exampleSet, fractionDigits, quoteWhitespace));
break;
default:
commandList.add(new SimpleCommand(exampleSet, command, arguments, fractionDigits));
break;
}
}
}
/** Formats a single example. */
public String format(Example example) {
StringBuffer str = new StringBuffer();
for (int i = 0; i < formatCommands.length; i++) {
str.append(formatCommands[i].format(example));
}
return str.toString();
}
}