/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.io;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.tools.Tools;
/**
* <p>Writes values of all examples into an XRFF file which can be used
* by the machine learning library Weka. The XRFF format is described in the
* {@link XrffExampleSource} operator which is able to read XRFF files to
* make them usable with RapidMiner.</p>
*
* <p>Please note that writing attribute weights is not supported, please use
* the other RapidMiner operators for attribute weight loading and writing for this
* purpose.</p>
*
* @rapidminer.index xrff
* @author Ingo Mierswa
* @version $Id: XrffExampleSetWriter.java,v 1.7 2008/07/07 07:06:38 ingomierswa Exp $
*/
public class XrffExampleSetWriter extends Operator {
/** The parameter name for "File to save the example set to." */
public static final String PARAMETER_EXAMPLE_SET_FILE = "example_set_file";
/** The parameter name for "Indicates if the data file should be compressed." */
public static final String PARAMETER_COMPRESS = "compress";
private static final Class[] INPUT_CLASSES = { ExampleSet.class };
private static final Class[] OUTPUT_CLASSES = { ExampleSet.class };
public XrffExampleSetWriter(OperatorDescription description) {
super(description);
}
public IOObject[] apply() throws OperatorException {
ExampleSet exampleSet = getInput(ExampleSet.class);
try {
File xrffFile = getParameterAsFile(PARAMETER_EXAMPLE_SET_FILE);
PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(xrffFile), getEncoding()));
out.println("<?xml version=\"1.0\" encoding=\"" + getEncoding() + "\"?>");
out.println("<dataset name=\"RapidMinerData\" version=\"3.5.4\">");
out.println(" <header>");
out.println(" <attributes>");
Iterator<AttributeRole> a = exampleSet.getAttributes().allAttributeRoles();
while (a.hasNext()) {
AttributeRole role = a.next();
// ignore weight attribute in order to use instance weights directly later
if ((role.getSpecialName() != null) && (role.getSpecialName().equals(Attributes.WEIGHT_NAME)))
continue;
Attribute attribute = role.getAttribute();
boolean label = (role.getSpecialName() != null) && (role.getSpecialName().equals(Attributes.LABEL_NAME));
printAttribute(attribute, out, label);
}
out.println(" </attributes>");
out.println(" </header>");
out.println(" <body>");
out.println(" <instances>");
Attribute weightAttribute = exampleSet.getAttributes().getWeight();
for (Example example : exampleSet) {
String weightString = "";
if (weightAttribute != null) {
weightString = " weight=\""+example.getValue(weightAttribute)+"\"";
}
out.println(" <instance"+weightString+">");
a = exampleSet.getAttributes().allAttributeRoles();
while (a.hasNext()) {
AttributeRole role = a.next();
// ignore weight attribute in order to use instance weights directly later
if ((role.getSpecialName() != null) && (role.getSpecialName().equals(Attributes.WEIGHT_NAME)))
continue;
Attribute attribute = role.getAttribute();
out.println(" <value>" + Tools.escapeXML(example.getValueAsString(attribute)) + "</value>");
}
out.println(" </instance>");
}
out.println(" </instances>");
out.println(" </body>");
out.println("</dataset>");
out.close();
} catch (IOException e) {
throw new UserError(this, e, 303, new Object[] { getParameterAsString(PARAMETER_EXAMPLE_SET_FILE), e.getMessage() });
}
return new IOObject[] { exampleSet };
}
private void printAttribute(Attribute attribute, PrintWriter out, boolean isClass) {
String classString = isClass ? "class=\"yes\" " : "";
if (attribute.isNominal()) {
out.println(" <attribute name=\"" + Tools.escapeXML(attribute.getName()) + "\" " + classString + "type=\"nominal\">");
out.println(" <labels>");
for (String s : attribute.getMapping().getValues()) {
out.println(" <label>" + Tools.escapeXML(s) + "</label>");
}
out.println(" </labels>");
out.println(" </attribute>");
} else {
out.println(" <attribute name=\"" + Tools.escapeXML(attribute.getName()) + "\" " + classString + "type=\"numeric\"/>");
}
}
public Class<?>[] getInputClasses() {
return INPUT_CLASSES;
}
public Class<?>[] getOutputClasses() {
return OUTPUT_CLASSES;
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeFile(PARAMETER_EXAMPLE_SET_FILE, "File to save the example set to.", "xrff", false));
//types.add(new ParameterTypeBoolean(PARAMETER_COMPRESS, "Indicates if the data file should be compressed.", false));
return types;
}
}