/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * CheckSource.java * Copyright (C) 2007-2012 University of Waikato, Hamilton, New Zealand */ package weka.classifiers; import java.io.File; import java.util.Enumeration; import java.util.Vector; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionHandler; import weka.core.RevisionUtils; import weka.core.Utils; import weka.core.converters.ConverterUtils.DataSource; /** * A simple class for checking the source generated from Classifiers * implementing the <code>weka.classifiers.Sourcable</code> interface. * It takes a classifier, the classname of the generated source * and the dataset the source was generated with as parameters and tests * the output of the built classifier against the output of the generated * source. Use option '-h' to display all available commandline options. * <!-- options-start --> * Valid options are: <p/> * * <pre> -W <classname and options> * The classifier (incl. options) that was used to generate * the source code.</pre> * * <pre> -S <classname> * The classname of the generated source code.</pre> * * <pre> -t <file> * The training set with which the source code was generated.</pre> * * <pre> -c <index> * The class index of the training set. 'first' and 'last' are * valid indices. * (default: last)</pre> * <!-- options-end --> * * Options after -- are passed to the designated classifier (specified with -W). * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ * @see weka.classifiers.Sourcable */ public class CheckSource implements OptionHandler, RevisionHandler { /** the classifier used for generating the source code */ protected Classifier m_Classifier = null; /** the generated source code */ protected Classifier m_SourceCode = null; /** the dataset to use for testing */ protected File m_Dataset = null; /** the class index */ protected int m_ClassIndex = -1; /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result = new Vector(); result.addElement(new Option( "\tThe classifier (incl. options) that was used to generate\n" + "\tthe source code.", "W", 1, "-W <classname and options>")); result.addElement(new Option( "\tThe classname of the generated source code.", "S", 1, "-S <classname>")); result.addElement(new Option( "\tThe training set with which the source code was generated.", "t", 1, "-t <file>")); result.addElement(new Option( "\tThe class index of the training set. 'first' and 'last' are\n" + "\tvalid indices.\n" + "\t(default: last)", "c", 1, "-c <index>")); return result.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -W <classname and options> * The classifier (incl. options) that was used to generate * the source code.</pre> * * <pre> -S <classname> * The classname of the generated source code.</pre> * * <pre> -t <file> * The training set with which the source code was generated.</pre> * * <pre> -c <index> * The class index of the training set. 'first' and 'last' are * valid indices. * (default: last)</pre> * <!-- options-end --> * * Options after -- are passed to the designated classifier (specified with * -W). * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; String[] spec; String classname; tmpStr = Utils.getOption('W', options); if (tmpStr.length() > 0) { spec = Utils.splitOptions(tmpStr); if (spec.length == 0) throw new IllegalArgumentException("Invalid classifier specification string"); classname = spec[0]; spec[0] = ""; setClassifier((Classifier) Utils.forName(Classifier.class, classname, spec)); } else { throw new Exception("No classifier (classname + options) provided!"); } tmpStr = Utils.getOption('S', options); if (tmpStr.length() > 0) { spec = Utils.splitOptions(tmpStr); if (spec.length != 1) throw new IllegalArgumentException("Invalid source code specification string"); classname = spec[0]; spec[0] = ""; setSourceCode((Classifier) Utils.forName(Classifier.class, classname, spec)); } else { throw new Exception("No source code (classname) provided!"); } tmpStr = Utils.getOption('t', options); if (tmpStr.length() != 0) setDataset(new File(tmpStr)); else throw new Exception("No dataset provided!"); tmpStr = Utils.getOption('c', options); if (tmpStr.length() != 0) { if (tmpStr.equals("first")) setClassIndex(0); else if (tmpStr.equals("last")) setClassIndex(-1); else setClassIndex(Integer.parseInt(tmpStr) - 1); } else { setClassIndex(-1); } } /** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector<String> result; result = new Vector<String>(); if (getClassifier() != null) { result.add("-W"); result.add(getClassifier().getClass().getName() + " " + Utils.joinOptions(((OptionHandler) getClassifier()).getOptions())); } if (getSourceCode() != null) { result.add("-S"); result.add(getSourceCode().getClass().getName()); } if (getDataset() != null) { result.add("-t"); result.add(m_Dataset.getAbsolutePath()); } result.add("-c"); if (getClassIndex() == -1) result.add("last"); else if (getClassIndex() == 0) result.add("first"); else result.add("" + (getClassIndex() + 1)); return result.toArray(new String[result.size()]); } /** * Sets the classifier to use for the comparison. * * @param value the classifier to use */ public void setClassifier(Classifier value) { m_Classifier = value; } /** * Gets the classifier being used for the tests, can be null. * * @return the currently set classifier */ public Classifier getClassifier() { return m_Classifier; } /** * Sets the class to test. * * @param value the class to test */ public void setSourceCode(Classifier value) { m_SourceCode = value; } /** * Gets the class to test. * * @return the currently set class, can be null. */ public Classifier getSourceCode() { return m_SourceCode; } /** * Sets the dataset to use for testing. * * @param value the dataset to use. */ public void setDataset(File value) { if (!value.exists()) throw new IllegalArgumentException( "Dataset '" + value.getAbsolutePath() + "' does not exist!"); else m_Dataset = value; } /** * Gets the dataset to use for testing, can be null. * * @return the dataset to use. */ public File getDataset() { return m_Dataset; } /** * Sets the class index of the dataset. * * @param value the class index of the dataset. */ public void setClassIndex(int value) { m_ClassIndex = value; } /** * Gets the class index of the dataset. * * @return the current class index. */ public int getClassIndex() { return m_ClassIndex; } /** * performs the comparison test * * @return true if tests were successful * @throws Exception if tests fail */ public boolean execute() throws Exception { boolean result; Classifier cls; Classifier code; int i; Instances data; DataSource source; boolean numeric; boolean different; double predClassifier; double predSource; result = true; // a few checks if (getClassifier() == null) throw new Exception("No classifier set!"); if (getSourceCode() == null) throw new Exception("No source code set!"); if (getDataset() == null) throw new Exception("No dataset set!"); if (!getDataset().exists()) throw new Exception( "Dataset '" + getDataset().getAbsolutePath() + "' does not exist!"); // load data source = new DataSource(getDataset().getAbsolutePath()); data = source.getDataSet(); if (getClassIndex() == -1) data.setClassIndex(data.numAttributes() - 1); else data.setClassIndex(getClassIndex()); numeric = data.classAttribute().isNumeric(); // build classifier cls = AbstractClassifier.makeCopy(getClassifier()); cls.buildClassifier(data); code = getSourceCode(); // compare predictions for (i = 0; i < data.numInstances(); i++) { // perform predictions predClassifier = cls.classifyInstance(data.instance(i)); predSource = code.classifyInstance(data.instance(i)); // compare both results if (Double.isNaN(predClassifier) && Double.isNaN(predSource)) { different = false; } else { if (numeric) different = !Utils.eq(predClassifier, predSource); else different = ((int) predClassifier != (int) predSource); } if (different) { result = false; if (numeric) System.out.println( (i+1) + ". instance (Classifier/Source code): " + predClassifier + " != " + predSource); else System.out.println( (i+1) + ". instance (Classifier/Source code): " + data.classAttribute().value((int) predClassifier) + " != " + data.classAttribute().value((int) predSource)); } } return result; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } /** * Executes the tests, use "-h" to list the commandline options. * * @param args the commandline parameters * @throws Exception if something goes wrong */ public static void main(String[] args) throws Exception{ CheckSource check; StringBuffer text; Enumeration enm; check = new CheckSource(); if (Utils.getFlag('h', args)) { text = new StringBuffer(); text.append("\nHelp requested:\n\n"); enm = check.listOptions(); while (enm.hasMoreElements()) { Option option = (Option) enm.nextElement(); text.append(option.synopsis() + "\n"); text.append(option.description() + "\n"); } System.out.println("\n" + text + "\n"); } else { check.setOptions(args); if (check.execute()) System.out.println("Tests OK!"); else System.out.println("Tests failed!"); } } }