/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.io;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.sql.Date;
import java.text.DateFormat;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.io.Encoding;
/**
* <p>
* This operator can be used to write data into CSV files (Comma Separated
* Values). The values and columns are separated by ";". Missing data
* values are indicated by empty cells.
* </p>
*
* @author Ingo Mierswa
*/
public class CSVExampleSetWriter extends AbstractExampleSetWriter {
/** The parameter name for "The CSV file which should be written." */
public static final String PARAMETER_CSV_FILE = "csv_file";
/** The parameter name for the column separator parameter. */
public static final String PARAMETER_COLUMN_SEPARATOR = "column_separator";
/** Indicates if the attribute names should be written as first row. */
public static final String PARAMETER_WRITE_ATTRIBUTE_NAMES = "write_attribute_names";
/**
* Indicates if nominal values should be quoted with double quotes. Quotes
* inside of nominal values will be escaped by a backslash.
*/
public static final String PARAMETER_QUOTE_NOMINAL_VALUES = "quote_nominal_values";
/**
* Indicates if date attributes are written as a formated string or as
* milliseconds past since January 1, 1970, 00:00:00 GMT
*/
// TODO introduce parameter which allows to determine the written format see
// Nominal2Date operator
public static final String PARAMETER_FORMAT_DATE = "format_date_attributes";
public CSVExampleSetWriter(OperatorDescription description) {
super(description);
}
@Override
public ExampleSet write(ExampleSet exampleSet) throws OperatorException {
String columnSeparator = getParameterAsString(PARAMETER_COLUMN_SEPARATOR);
File file = getParameterAsFile(PARAMETER_CSV_FILE, true);
boolean quoteNominalValues = getParameterAsBoolean(PARAMETER_QUOTE_NOMINAL_VALUES);
PrintWriter out = null;
try {
out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), Encoding.getEncoding(this)));
// write column names
if (getParameterAsBoolean(PARAMETER_WRITE_ATTRIBUTE_NAMES)) {
Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
boolean first = true;
while (a.hasNext()) {
if (!first)
out.print(columnSeparator);
Attribute attribute = a.next();
String name = attribute.getName();
if (quoteNominalValues) {
name = name.replaceAll("\"", "'");
name = "\"" + name + "\"";
}
out.print(name);
first = false;
}
out.println();
}
// write data
for (Example example : exampleSet) {
Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
boolean first = true;
while (a.hasNext()) {
Attribute attribute = a.next();
if (!first)
out.print(columnSeparator);
if (!Double.isNaN(example.getValue(attribute))) {
if (attribute.isNominal()) {
String stringValue = example.getValueAsString(attribute);
if (quoteNominalValues) {
stringValue = stringValue.replaceAll("\"", "'");
stringValue = "\"" + stringValue + "\"";
}
out.print(stringValue);
} else {
Double value = example.getValue(attribute);
if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
if (getParameterAsBoolean(PARAMETER_FORMAT_DATE)) {
Date date = new Date(value.longValue());
String s = DateFormat.getInstance().format(date);
out.print(s);
} else {
out.print(value);
}
} else {
out.print(value);
}
}
}
first = false;
}
out.println();
}
} catch (FileNotFoundException e) {
throw new UserError(this, 301, file.getName());
} finally {
if (out != null) {
out.close();
}
}
return exampleSet;
}
@Override
protected boolean supportsEncoding() {
return true;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = new LinkedList<ParameterType>();
types.add(new ParameterTypeFile(PARAMETER_CSV_FILE, "The CSV file which should be written.", "csv", false));
types.add(new ParameterTypeString(PARAMETER_COLUMN_SEPARATOR, "The column separator.", ";", false));
types.add(new ParameterTypeBoolean(PARAMETER_WRITE_ATTRIBUTE_NAMES,
"Indicates if the attribute names should be written as first row.", true, false));
types.add(new ParameterTypeBoolean(PARAMETER_QUOTE_NOMINAL_VALUES,
"Indicates if nominal values should be quoted with double quotes.", true, false));
types
.add(new ParameterTypeBoolean(
PARAMETER_FORMAT_DATE,
"Indicates if date attributes are written as a formated string or as milliseconds past since January 1, 1970, 00:00:00 GMT",
true, true));
types.addAll(super.getParameterTypes());
return types;
}
}