/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.io;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.NumericalAttribute;
import com.rapidminer.example.table.SparseFormatDataRowReader;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.tools.io.Encoding;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.LinkedList;
import java.util.List;
/**
* Writes values of all examples in an {@link ExampleSet} to a file. Dense, sparse, and user defined
* formats (specified by the parameter 'format') can be used. Attribute description files may be
* generated for dense and sparse format as well. These formats can be read using the
* {@link ExampleSource} and {@link SparseFormatExampleSource} operators.
*
* <dl>
* <dt>dense:</dt>
* <dd>Each line of the generated data file is of the form<br/>
* <center>
*
* <pre>
* regular attributes <special attributes>
* </pre>
*
* </center> For example, each line could have the form <center>
*
* <pre>
* value1 value2 ... valueN <id> <label> <prediction> ... <confidences>
* </pre>
*
* </center> Values in parenthesis are optional and are only printed if they are available. The
* confidences are only given for nominal predictions. Other special attributes might be the example
* weight or the cluster number.</dd>
* <dt>sparse:</dt>
* <dd>Only non 0 values are written to the file, prefixed by a column index. See the description of
* {@link SparseFormatExampleSource} for details.</dd>
* </dl>
*
* @see com.rapidminer.example.ExampleSet
*
* @author Simon Fischer, Ingo Mierswa
*/
public class ExampleSetWriter extends AppendingExampleSetWriter {
/** The parameter name for "File to save the example set to." */
public static final String PARAMETER_EXAMPLE_SET_FILE = "example_set_file";
/** The parameter name for "File to save the attribute descriptions to." */
public static final String PARAMETER_ATTRIBUTE_DESCRIPTION_FILE = "attribute_description_file";
/** The parameter name for "Format to use for output." */
public static final String PARAMETER_FORMAT = "format";
/** The parameter name for "Indicates if the data file content should be zipped." */
public static final String PARAMETER_ZIPPED = "zipped";
private static final String[] FORMAT_NAMES = new String[SparseFormatDataRowReader.FORMAT_NAMES.length + 1];
private static final int DENSE_FORMAT = 0;
static {
FORMAT_NAMES[0] = "dense";
for (int i = 0; i < SparseFormatDataRowReader.FORMAT_NAMES.length; i++) {
FORMAT_NAMES[i + 1] = "sparse_" + SparseFormatDataRowReader.FORMAT_NAMES[i];
}
}
public ExampleSetWriter(OperatorDescription description) {
super(description);
}
@Override
public ExampleSet write(ExampleSet eSet) throws OperatorException {
boolean zipped = getParameterAsBoolean(PARAMETER_ZIPPED);
File dataFile = getParameterAsFile(PARAMETER_EXAMPLE_SET_FILE, true);
if (zipped) {
dataFile = new File(dataFile.getAbsolutePath() + ".gz");
}
File attFile = getParameterAsFile(PARAMETER_ATTRIBUTE_DESCRIPTION_FILE, true);
boolean append = shouldAppend(dataFile);
Charset encoding = Encoding.getEncoding(this);
try {
// write example set
int format = getParameterAsInt(PARAMETER_FORMAT);
getLogger().info("Writing example set in format '" + FORMAT_NAMES[format] + "'.");
if (format == DENSE_FORMAT) { // dense
eSet.writeDataFile(dataFile, NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, true, zipped, append, encoding);
if (attFile != null) {
eSet.writeAttributeFile(attFile, dataFile, encoding);
}
} else { // sparse
eSet.writeSparseDataFile(dataFile, format - 1, NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, true, zipped,
append, encoding);
if (attFile != null) {
eSet.writeSparseAttributeFile(attFile, dataFile, format - 1, encoding);
}
}
} catch (IOException e) {
throw new UserError(this, e, 303, new Object[] { dataFile + " / " + attFile, e.getMessage() });
}
return eSet;
}
@Override
protected boolean supportsEncoding() {
return true;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = new LinkedList<ParameterType>();
types.add(new ParameterTypeFile(PARAMETER_EXAMPLE_SET_FILE, "File to save the example set to.", "dat", false));
types.add(new ParameterTypeFile(PARAMETER_ATTRIBUTE_DESCRIPTION_FILE, "File to save the attribute descriptions to.",
"aml", true));
types.add(new ParameterTypeCategory(PARAMETER_FORMAT, "Format to use for output.", FORMAT_NAMES, 0));
types.add(new ParameterTypeBoolean(PARAMETER_ZIPPED, "Indicates if the data file content should be zipped.", false));
types.addAll(super.getParameterTypes());
return types;
}
}