/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example.set; import java.awt.BorderLayout; import java.awt.CardLayout; import java.awt.Component; import java.awt.FlowLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.zip.GZIPOutputStream; import javax.swing.AbstractAction; import javax.swing.Action; import javax.swing.ButtonGroup; import javax.swing.Icon; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.JRadioButton; import com.rapidminer.datatable.DataTable; import com.rapidminer.datatable.DataTableExampleSetAdapter; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.Attributes; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Statistics; import com.rapidminer.example.table.NumericalAttribute; import com.rapidminer.example.table.SparseFormatDataRowReader; import com.rapidminer.gui.RapidMinerGUI; import com.rapidminer.gui.plotter.PlotterPanel; import com.rapidminer.gui.tools.SwingTools; import com.rapidminer.gui.viewer.DataViewer; import com.rapidminer.gui.viewer.MetaDataViewer; import com.rapidminer.operator.IOContainer; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.MissingIOObjectException; import com.rapidminer.operator.ResultObjectAdapter; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.Tools; /** * Implements wrapper methods of abstract example set. Implements all * ResultObject methods.<br> * * Apart from the interface methods the implementing classes must have a public * single argument clone constructor. This constructor is invoked by reflection * from the clone method. Do not forget to call the superclass method. * * @author Ingo Mierswa, Simon Fischer * @version $Id: AbstractExampleSet.java,v 2.74 2006/03/27 13:21:58 ingomierswa * Exp $ */ public abstract class AbstractExampleSet extends ResultObjectAdapter implements ExampleSet { /** * */ private static final long serialVersionUID = 8596141056047402798L; private static final String RESULT_ICON_NAME = "data.png"; private static Icon resultIcon = null; static { resultIcon = SwingTools.createIcon("16/" + RESULT_ICON_NAME); } /** Maps attribute names to list of statistics objects. */ private Map<String, List<Statistics>> statisticsMap = new HashMap<String, List<Statistics>>(); /** Maps the id values on the line index in the example table. */ private Map<Double, Integer> idMap = new HashMap<Double, Integer>(); /** This method overrides the implementation of ResultObjectAdapter and returns "ExampleSet". */ public String getName() { return "ExampleSet"; } public Example getExampleFromId(double id) { Integer indexObject = idMap.get(id); if (indexObject == null) return null; else return getExample(indexObject); } // --- Visualisation and toString() methods --- public String toString() { StringBuffer str = new StringBuffer(Tools.classNameWOPackage(this.getClass()) + ":" + Tools.getLineSeparator()); str.append(size() + " examples," + Tools.getLineSeparator()); str.append(getAttributes().size() + " regular attributes," + Tools.getLineSeparator()); boolean first = true; Iterator<AttributeRole> s = getAttributes().specialAttributes(); while (s.hasNext()) { if (first) { str.append("special attributes = {" + Tools.getLineSeparator()); first = false; } AttributeRole special = s.next(); str.append(" " + special.getSpecialName() + " = " + special.getAttribute() + Tools.getLineSeparator()); } if (!first) { str.append("}"); } else { str.append("no special attributes" + Tools.getLineSeparator()); } return str.toString(); } /** This method is used to create a {@link DataTable} from this example set. The default implementation * returns an instance of {@link DataTableExampleSetAdapter}. The given IOContainer is used to check if * there are compatible attribute weights which would used as column weights of the returned table. * Subclasses might want to override this method in order to allow for other data tables. */ public DataTable createDataTable(IOContainer container) { AttributeWeights weights = null; if (container != null) { try { weights = container.get(AttributeWeights.class); for (Attribute attribute : getAttributes()) { double weight = weights.getWeight(attribute.getName()); if (Double.isNaN(weight)) { // not compatible weights = null; break; } } } catch (MissingIOObjectException e) {} } return new DataTableExampleSetAdapter(this, weights); } /** * Returns component with several views controlled by radio buttons. The first view is a meta data viewer, the * second a data viewer and the last one a plotter panel. For this plotter the data table created by {@link #createDataTable(IOContainer)} * is used. */ public Component getVisualizationComponent(IOContainer container) { return getVisualizationComponent(container, true); } /** * Returns component with several views controlled by radio buttons. The first view is a meta data viewer, the * second a data viewer and the last one a plotter panel. For this plotter the data table created by {@link #createDataTable(IOContainer)} * is used. */ public Component getVisualizationComponent(final IOContainer container, boolean showOptions) { final JPanel borderPanel = new JPanel(new BorderLayout()); // meta data html table view final MetaDataViewer metaDataViewer = new MetaDataViewer(this, showOptions); // data html table view final DataViewer dataViewer = new DataViewer(this, showOptions); // statistics plotter view DataTable dataTable = createDataTable(container); final PlotterPanel plotterComponent = new PlotterPanel(dataTable); final CardLayout cardLayout = new CardLayout(); final JPanel mainPanel = new JPanel(cardLayout); final String metaString = "META"; final String dataString = "DATA"; final String plotString = "PLOT"; mainPanel.add(metaDataViewer, metaString); mainPanel.add(dataViewer, dataString); mainPanel.add(plotterComponent, plotString); // toggle radio button for views final JRadioButton metaDataButton = new JRadioButton("Meta Data View", true); metaDataButton.setToolTipText("Changes to a table showing information about all attributes."); metaDataButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { if (metaDataButton.isSelected()) { cardLayout.show(mainPanel, metaString); } } }); final JRadioButton dataButton = new JRadioButton("Data View", true); dataButton.setToolTipText("Changes to a table showing the complete example set."); dataButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { if (dataButton.isSelected()) { cardLayout.show(mainPanel, dataString); } } }); final JRadioButton plotButton = new JRadioButton("Plot View", false); plotButton.setToolTipText("Changes to a plot view of the example data."); plotButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { if (plotButton.isSelected()) { cardLayout.show(mainPanel, plotString); } } }); ButtonGroup group = new ButtonGroup(); group.add(metaDataButton); group.add(dataButton); group.add(plotButton); JPanel togglePanel = new JPanel(new FlowLayout(FlowLayout.LEFT)); togglePanel.add(metaDataButton); togglePanel.add(dataButton); togglePanel.add(plotButton); borderPanel.add(togglePanel, BorderLayout.NORTH); borderPanel.add(mainPanel, BorderLayout.CENTER); return borderPanel; } public Icon getResultIcon() { return resultIcon; } public List<Action> getActions() { List<Action> result = new LinkedList<Action>(); result.add(new AbstractAction("Save...") { private static final long serialVersionUID = 763183727596275786L; public void actionPerformed(ActionEvent e) { JOptionPane.showMessageDialog(null, "In the following, you can save both a data file and an attribute description file.", "Save data and meta data", JOptionPane.INFORMATION_MESSAGE); File dataFile = SwingTools.chooseFile(null, null, false, "dat", "example set data file"); try { if (dataFile != null) { Charset encoding = RapidMinerGUI.getMainFrame().getProcess().getRootOperator().getEncoding(); writeDataFile(dataFile, NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, true, false, false, encoding); File attFile = SwingTools.chooseFile(null, dataFile, false, "aml", "attribute description file"); if (attFile != null) { writeAttributeFile(attFile, dataFile, encoding); } } } catch (Exception ex) { SwingTools.showSimpleErrorMessage("Cannot write example set to file '" + dataFile + "'", ex); } } }); return result; } // -------------------- File Writing -------------------- public void writeDataFile(File dataFile, int fractionDigits, boolean quoteWhitespace, boolean zipped, boolean append, Charset encoding) throws IOException { PrintWriter out = null; OutputStream outStream = null; try { if (zipped) { outStream = new GZIPOutputStream(new FileOutputStream(dataFile, append)); } else { outStream = new FileOutputStream(dataFile, append); } out = new PrintWriter(new OutputStreamWriter(outStream, encoding)); Iterator<Example> reader = iterator(); while (reader.hasNext()) { out.println(reader.next().toDenseString(fractionDigits, quoteWhitespace)); } } catch (IOException e) { throw e; } finally { if (out != null) { out.close(); } if (outStream != null) { outStream.close(); } } } /** Writes the data into a sparse file format. */ public void writeSparseDataFile(File dataFile, int format, int fractionDigits, boolean quoteWhitespace, boolean zipped, boolean append, Charset encoding) throws IOException { PrintWriter out = null; OutputStream outStream = null; try { if (zipped) { outStream = new GZIPOutputStream(new FileOutputStream(dataFile, append)); } else { outStream = new FileOutputStream(dataFile, append); } out = new PrintWriter(new OutputStreamWriter(outStream, encoding)); Iterator<Example> reader = iterator(); while (reader.hasNext()) { out.println(reader.next().toSparseString(format, fractionDigits, quoteWhitespace)); } } catch (IOException e) { throw e; } finally { if (out != null) { out.close(); } if (outStream != null) { outStream.close(); } } } /** * Writes the attribute descriptions for all examples. Writes first all * regular attributes and then the special attributes (just like the data * write format of {@link Example#toString()}. Please note that the given * data file will only be used to determine the relative position. */ public void writeAttributeFile(File attFile, File dataFile, Charset encoding) throws IOException { // determine relative path if (dataFile == null) throw new IOException("ExampleSet writing: cannot determine path to data file: data file was not given!"); String relativePath = Tools.getRelativePath(dataFile, attFile); PrintWriter aout = new PrintWriter(new OutputStreamWriter(new FileOutputStream(attFile), encoding)); aout.println("<?xml version=\"1.0\" encoding=\""+encoding+"\"?>"); aout.println("<attributeset default_source=\"" + relativePath + "\">" + Tools.getLineSeparator()); int sourcecol = 1; Iterator<AttributeRole> i = getAttributes().allAttributeRoles(); while (i.hasNext()) { if (sourcecol != 1) { aout.println(); } writeAttributeMetaData(i.next(), sourcecol, aout, false); sourcecol++; } aout.println(Tools.getLineSeparator() + "</attributeset>"); aout.close(); } /** * Writes the attribute descriptions for all examples. Writes only the * special attributes which are supported by the sparse format of the method * {@link Example#toSparseString(int, int, boolean)}. Please note that the given data * file is only be used to determine the relative position. */ public void writeSparseAttributeFile(File attFile, File dataFile, int format, Charset encoding) throws IOException { if (dataFile == null) throw new IOException("ExampleSet sparse writing: cannot determine path to data file: data file was not given!"); PrintWriter aout = new PrintWriter(new OutputStreamWriter(new FileOutputStream(attFile), encoding)); String relativePath = Tools.getRelativePath(dataFile, attFile); aout.println("<?xml version=\"1.0\" encoding=\""+encoding+"\"?>"); aout.println("<attributeset default_source=\"" + relativePath + "\">"); // some of special attributes AttributeRole labelRole = getAttributes().getRole(Attributes.LABEL_NAME); if ((labelRole != null) && (format != SparseFormatDataRowReader.FORMAT_NO_LABEL)) writeAttributeMetaData(labelRole, 0, aout, true); AttributeRole idRole = getAttributes().getRole(Attributes.ID_NAME); if (idRole != null) writeAttributeMetaData(idRole, 0, aout, true); AttributeRole weightRole = getAttributes().getRole(Attributes.WEIGHT_NAME); if (weightRole != null) writeAttributeMetaData(weightRole, 0, aout, true); // regular attributes int sourcecol = 1; for (Attribute attribute : getAttributes()) { writeAttributeMetaData("attribute", attribute, sourcecol, aout, true); sourcecol++; } aout.println("</attributeset>"); aout.close(); } /** Writes the data of this attribute in the given stream. */ private void writeAttributeMetaData(AttributeRole attributeRole, int sourcecol, PrintWriter aout, boolean sparse) { String tag = "attribute"; if (attributeRole.isSpecial()) tag = attributeRole.getSpecialName(); Attribute attribute = attributeRole.getAttribute(); writeAttributeMetaData(tag, attribute, sourcecol, aout, sparse); } /** Writes the data of this attribute in the given stream. */ private void writeAttributeMetaData(String tag, Attribute attribute, int sourcecol, PrintWriter aout, boolean sparse) { aout.println(" <" + Tools.escapeXML(tag)); aout.println(" name = \"" + Tools.escapeXML(attribute.getName()) + "\""); if (!sparse || tag.equals("attribute")) { aout.println(" sourcecol = \"" + sourcecol + "\""); } aout.print(" valuetype = \"" + Ontology.ATTRIBUTE_VALUE_TYPE.mapIndex(attribute.getValueType()) + "\""); if (!Ontology.ATTRIBUTE_BLOCK_TYPE.isA(attribute.getBlockType(), Ontology.SINGLE_VALUE)) aout.print(Tools.getLineSeparator() + " blocktype = \"" + Ontology.ATTRIBUTE_BLOCK_TYPE.mapIndex(attribute.getBlockType()) + "\""); if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NOMINAL)) && (!tag.equals(Attributes.KNOWN_ATTRIBUTE_TYPES[Attributes.TYPE_ID]))) { aout.println(">"); Iterator<String> i = attribute.getMapping().getValues().iterator(); while (i.hasNext()) { aout.println(" <value>" + Tools.escapeXML(i.next()) + "</value>"); } aout.println(" </" + Tools.escapeXML(tag) + ">"); } else { // no values, simply end this attribute aout.println("/>"); } } public String getExtension() { return "aml"; } public String getFileDescription() { return "attribute description file"; } /** * Returns true, if all attributes including labels and other special * attributes are equal. */ public boolean equals(Object o) { if (!(o instanceof ExampleSet)) { return false; } ExampleSet es = (ExampleSet) o; return getAttributes().equals(es.getAttributes()); } /** Returns the hash code of all attributes. */ public int hashCode() { return getAttributes().hashCode(); } public IOObject copy() { return (IOObject)clone(); } /** Clones the example set by invoking a single argument clone constructor. Please note that a cloned * example set has no information about the attribute statistics. That means, that attribute statistics * must be (re-)calculated after the clone was created. */ public Object clone() { try { Class<? extends AbstractExampleSet> clazz = getClass(); java.lang.reflect.Constructor cloneConstructor = clazz.getConstructor(new Class[] { clazz }); AbstractExampleSet result = (AbstractExampleSet)cloneConstructor.newInstance(new Object[] { this }); result.idMap = this.idMap; return result; } catch (IllegalAccessException e) { throw new RuntimeException("Cannot clone ExampleSet: " + e.getMessage()); } catch (NoSuchMethodException e) { throw new RuntimeException("'" + getClass().getName() + "' does not implement clone constructor!"); } catch (java.lang.reflect.InvocationTargetException e) { throw new RuntimeException("Cannot clone " + getClass().getName() + ": " + e + ". Target: " + e.getTargetException() + ". Cause: " + e.getCause() + "."); } catch (InstantiationException e) { throw new RuntimeException("Cannot clone " + getClass().getName() + ": " + e); } } // ============================================================================= public void remapIds() { idMap = new HashMap<Double, Integer>(); Attribute idAttribute = getAttributes().getSpecial(Attributes.ID_NAME); if (idAttribute != null) { int index = 0; for (Example example : this) { idMap.put(example.getValue(idAttribute), index); index++; } } } // ============================================================================= /** * Recalculates the attribute statistics for all attributes. They are * average value, variance, minimum, and maximum. For nominal attributes the * occurences for all values are counted. This method collects all * attributes (regular and special) in a list and invokes * <code>recalculateAttributeStatistics(List attributes)</code> and * performs only one data scan. */ public void recalculateAllAttributeStatistics() { List<Attribute> allAttributes = new ArrayList<Attribute>(); Iterator<Attribute> a = getAttributes().allAttributes(); while (a.hasNext()) { allAttributes.add(a.next()); } recalculateAttributeStatistics(allAttributes); } /** Recalculate the attribute statistics of the given attribute. */ public void recalculateAttributeStatistics(Attribute attribute) { List<Attribute> allAttributes = new ArrayList<Attribute>(); allAttributes.add(attribute); recalculateAttributeStatistics(allAttributes); } /** * Here the Example Set is parsed only once, all the information is retained * for each example set. */ private void recalculateAttributeStatistics(List<Attribute> attributeList) { // do nothing if not desired if (attributeList.size() == 0) { return; } else { // init statistics for (Attribute attribute : attributeList) { Iterator<Statistics> stats = attribute.getAllStatistics(); while (stats.hasNext()) { Statistics statistics = stats.next(); statistics.startCounting(attribute); } } // calculate statistics for (Example example : this) { for (Attribute attribute : attributeList) { double value = example.getValue(attribute); Iterator<Statistics> stats = attribute.getAllStatistics(); while (stats.hasNext()) { Statistics statistics = stats.next(); statistics.count(value); } } } // store cloned statistics for (Attribute attribute : attributeList) { List<Statistics> statisticsList = statisticsMap.get(attribute.getName()); // no stats known for this attribute at all --> new list if (statisticsList == null) { statisticsList = new LinkedList<Statistics>(); statisticsMap.put(attribute.getName(), statisticsList); } // in all cases: clear the list before adding new stats (clone of the calculations) statisticsList.clear(); Iterator<Statistics> stats = attribute.getAllStatistics(); while (stats.hasNext()) { Statistics statistics = (Statistics)stats.next().clone(); statisticsList.add(statistics); } } } } /** Returns the desired statistic for the given attribute. This method should be * preferred over the deprecated method Attribute#getStatistics(String) * since it correctly calculates and keep the statistics for the current example * set and does not overwrite the statistics in the attribute. * Invokes the method {@link #getStatistics(Attribute, String, String)} with a null * statistics parameter. */ public double getStatistics(Attribute attribute, String statisticsName) { return getStatistics(attribute, statisticsName, null); } /** Returns the desired statistic for the given attribute. This method should be * preferred over the deprecated method Attribute#getStatistics(String) * since it correctly calculates and keep the statistics for the current example * set and does not overwrite the statistics in the attribute. If the statistics * were not calculated before (via one of the recalculate methods) this method * will return NaN. If no statistics is available for the given name, also NaN * is returned. */ public double getStatistics(Attribute attribute, String statisticsName, String statisticsParameter) { List<Statistics> statisticsList = statisticsMap.get(attribute.getName()); if (statisticsList == null) return Double.NaN; for (Statistics statistics : statisticsList) { if (statistics.handleStatistics(statisticsName)) { return statistics.getStatistics(attribute, statisticsName, statisticsParameter); } } return Double.NaN; } }