/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * ArffSaver.java * Copyright (C) 2004 University of Waikato, Hamilton, New Zealand * */ package weka.core.converters; import weka.core.Capabilities; import weka.core.Instance; import weka.core.Instances; import weka.core.Option; import weka.core.RevisionUtils; import weka.core.Utils; import weka.core.Capabilities.Capability; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.PrintWriter; import java.util.Enumeration; import java.util.Vector; import java.util.zip.GZIPOutputStream; /** * Writes to a destination in arff text format. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -i <the input file> * The input file</pre> * * <pre> -o <the output file> * The output file</pre> * <!-- options-end --> * * @author Stefan Mutter (mutter@cs.waikato.ac.nz) * @version $Revision: 7763 $ * @see Saver */ public class ArffSaver extends AbstractFileSaver implements BatchConverter, IncrementalConverter { /** for serialization */ static final long serialVersionUID = 2223634248900042228L; /** whether to compress the output */ protected boolean m_CompressOutput = false; /** Constructor */ public ArffSaver(){ resetOptions(); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector<Option> result; result = new Vector<Option>(); Enumeration en = super.listOptions(); while (en.hasMoreElements()) result.addElement((Option)en.nextElement()); result.addElement( new Option( "\tCompresses the data (uses '" + ArffLoader.FILE_EXTENSION_COMPRESSED + "' as extension instead of '" + ArffLoader.FILE_EXTENSION + "')\n" + "\t(default: off)", "compress", 0, "-compress")); return result.elements(); } /** * returns the options of the current setup * * @return the current options */ public String[] getOptions(){ int i; Vector<String> result; String[] options; result = new Vector<String>(); if (getCompressOutput()) result.add("-compress"); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); return (String[]) result.toArray(new String[result.size()]); } /** * Parses the options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -i <the input file> * The input file</pre> * * <pre> -o <the output file> * The output file</pre> * * <pre> -compress * Compresses the data (uses '.xrff.gz' as extension instead of '.xrff') * (default: off)</pre> * <!-- options-end --> * * @param options the options to use * @throws Exception if setting of options fails */ public void setOptions(String[] options) throws Exception { setCompressOutput(Utils.getFlag("compress", options)); super.setOptions(options); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String compressOutputTipText() { return "Optional compression of the output data"; } /** * Gets whether the output data is compressed. * * @return true if the output data is compressed */ public boolean getCompressOutput() { return m_CompressOutput; } /** * Sets whether to compress the output. * * @param value if truee the output will be compressed */ public void setCompressOutput(boolean value) { m_CompressOutput = value; } /** * Returns a string describing this Saver * @return a description of the Saver suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Writes to a destination that is in arff (attribute relation file format) " +"format. The data can be compressed with gzip in order to save space."; } /** * Returns a description of the file type. * * @return a short file description */ public String getFileDescription() { return "Arff data files"; } /** * Gets all the file extensions used for this type of file * * @return the file extensions */ public String[] getFileExtensions() { return new String[]{ArffLoader.FILE_EXTENSION, ArffLoader.FILE_EXTENSION_COMPRESSED}; } /** * Sets the destination file. * * @param outputFile the destination file. * @throws IOException throws an IOException if file cannot be set */ public void setFile(File outputFile) throws IOException { if (outputFile.getAbsolutePath().endsWith(ArffLoader.FILE_EXTENSION_COMPRESSED)) setCompressOutput(true); super.setFile(outputFile); } /** * Sets the destination output stream. * * @param output the output stream. * @throws IOException throws an IOException if destination cannot be set */ public void setDestination(OutputStream output) throws IOException { if (getCompressOutput()) super.setDestination(new GZIPOutputStream(output)); else super.setDestination(output); } /** * Resets the Saver */ public void resetOptions() { super.resetOptions(); setFileExtension(".arff"); } /** * Returns the Capabilities of this saver. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enableAllAttributes(); result.enable(Capability.MISSING_VALUES); // class result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); return result; } /** Saves an instances incrementally. Structure has to be set by using the * setStructure() method or setInstances() method. * @param inst the instance to save * @throws IOException throws IOEXception if an instance cannot be saved incrementally. */ public void writeIncremental(Instance inst) throws IOException{ int writeMode = getWriteMode(); Instances structure = getInstances(); PrintWriter outW = null; if(getRetrieval() == BATCH || getRetrieval() == NONE) throw new IOException("Batch and incremental saving cannot be mixed."); if(getWriter() != null) outW = new PrintWriter(getWriter()); if(writeMode == WAIT){ if(structure == null){ setWriteMode(CANCEL); if(inst != null) System.err.println("Structure(Header Information) has to be set in advance"); } else setWriteMode(STRUCTURE_READY); writeMode = getWriteMode(); } if(writeMode == CANCEL){ if(outW != null) outW.close(); cancel(); } if(writeMode == STRUCTURE_READY){ setWriteMode(WRITE); //write header Instances header = new Instances(structure,0); if(retrieveFile() == null || outW == null) System.out.println(header.toString()); else{ outW.print(header.toString()); outW.print("\n"); outW.flush(); } writeMode = getWriteMode(); } if(writeMode == WRITE){ if(structure == null) throw new IOException("No instances information available."); if(inst != null){ //write instance if(retrieveFile() == null || outW == null) System.out.println(inst); else{ outW.println(inst); m_incrementalCounter++; //flush every 100 instances if(m_incrementalCounter > 100){ m_incrementalCounter = 0; outW.flush(); } } } else{ //close if(outW != null){ outW.flush(); outW.close(); } m_incrementalCounter = 0; resetStructure(); outW = null; resetWriter(); } } } /** Writes a Batch of instances * @throws IOException throws IOException if saving in batch mode is not possible */ public void writeBatch() throws IOException { if(getInstances() == null) throw new IOException("No instances to save"); if(getRetrieval() == INCREMENTAL) throw new IOException("Batch and incremental saving cannot be mixed."); setRetrieval(BATCH); setWriteMode(WRITE); if(retrieveFile() == null && getWriter() == null){ System.out.println((getInstances()).toString()); setWriteMode(WAIT); return; } PrintWriter outW = new PrintWriter(getWriter()); Instances data = getInstances(); // header Instances header = new Instances(data, 0); outW.print(header.toString()); // data for (int i = 0; i < data.numInstances(); i++) { if (i % 1000 == 0) outW.flush(); outW.println(data.instance(i)); } outW.flush(); outW.close(); setWriteMode(WAIT); outW = null; resetWriter(); setWriteMode(CANCEL); } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 7763 $"); } /** * Main method. * * @param args should contain the options of a Saver. */ public static void main(String[] args) { runFileSaver(new ArffSaver(), args); } }