/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.io;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.NumericalAttribute;
import com.rapidminer.example.table.SparseFormatDataRowReader;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.tools.io.Encoding;
/**
* Writes values of all examples in an {@link ExampleSet} to a file. Dense,
* sparse, and user defined formats (specified by the parameter 'format') can be
* used. Attribute description files may be generated for dense and sparse
* format as well. These formats can be read using the {@link ExampleSource} and
* {@link SparseFormatExampleSource} operators.
*
* <dl>
* <dt>dense:</dt>
* <dd> Each line of the generated data file is of the form<br/> <center>
*
* <pre>
* regular attributes <special attributes>
* </pre>
*
* </center> For example, each line could have the form <center>
*
* <pre>
* value1 value2 ... valueN <id> <label> <prediction> ... <confidences>
* </pre>
*
* </center> Values in parenthesis are optional and are only printed if they are
* available. The confidences are only given for nominal predictions. Other
* special attributes might be the example weight or the cluster number. </dd>
* <dt>sparse:</dt>
* <dd>Only non 0 values are written to the file, prefixed by a column index.
* See the description of {@link SparseFormatExampleSource} for details. </dd>
* </dl>
*
* @see com.rapidminer.example.ExampleSet
*
* @author Simon Fischer, Ingo Mierswa
*/
public class ExampleSetWriter extends AppendingExampleSetWriter {
/** The parameter name for "File to save the example set to." */
public static final String PARAMETER_EXAMPLE_SET_FILE = "example_set_file";
/** The parameter name for "File to save the attribute descriptions to." */
public static final String PARAMETER_ATTRIBUTE_DESCRIPTION_FILE = "attribute_description_file";
/** The parameter name for "Format to use for output." */
public static final String PARAMETER_FORMAT = "format";
/** The parameter name for "Indicates if the data file content should be zipped." */
public static final String PARAMETER_ZIPPED = "zipped";
private static final String[] FORMAT_NAMES = new String[SparseFormatDataRowReader.FORMAT_NAMES.length + 1];
private static final int DENSE_FORMAT = 0;
static {
FORMAT_NAMES[0] = "dense";
for (int i = 0; i < SparseFormatDataRowReader.FORMAT_NAMES.length; i++) {
FORMAT_NAMES[i + 1] = "sparse_" + SparseFormatDataRowReader.FORMAT_NAMES[i];
}
}
public ExampleSetWriter(OperatorDescription description) {
super(description);
}
@Override
public ExampleSet write(ExampleSet eSet) throws OperatorException {
boolean zipped = getParameterAsBoolean(PARAMETER_ZIPPED);
File dataFile = getParameterAsFile(PARAMETER_EXAMPLE_SET_FILE, true);
if (zipped) {
dataFile = new File(dataFile.getAbsolutePath() + ".gz");
}
File attFile = getParameterAsFile(PARAMETER_ATTRIBUTE_DESCRIPTION_FILE, true);
boolean append = shouldAppend(dataFile);
Charset encoding = Encoding.getEncoding(this);
try {
// write example set
int format = getParameterAsInt(PARAMETER_FORMAT);
getLogger().info("Writing example set in format '" + FORMAT_NAMES[format] + "'.");
if (format == DENSE_FORMAT) { // dense
eSet.writeDataFile(dataFile, NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, true, zipped, append, encoding);
if (attFile != null) {
eSet.writeAttributeFile(attFile, dataFile, encoding);
}
} else { // sparse
eSet.writeSparseDataFile(dataFile, format - 1, NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, true, zipped, append, encoding);
if (attFile != null)
eSet.writeSparseAttributeFile(attFile, dataFile, format - 1, encoding);
}
} catch (IOException e) {
throw new UserError(this, e, 303, new Object[] { dataFile + " / " + attFile, e.getMessage() });
}
return eSet;
}
@Override
protected boolean supportsEncoding() {
return true;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = new LinkedList<ParameterType>();
types.add(new ParameterTypeFile(PARAMETER_EXAMPLE_SET_FILE, "File to save the example set to.", "dat", false));
types.add(new ParameterTypeFile(PARAMETER_ATTRIBUTE_DESCRIPTION_FILE, "File to save the attribute descriptions to.", "aml", true));
types.add(new ParameterTypeCategory(PARAMETER_FORMAT, "Format to use for output.", FORMAT_NAMES, 0));
types.add(new ParameterTypeBoolean(PARAMETER_ZIPPED, "Indicates if the data file content should be zipped.", false));
types.addAll(super.getParameterTypes());
return types;
}
}