/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Copyright (C) 2002 University of Waikato */ package weka.filters; import weka.classifiers.Classifier; import weka.classifiers.meta.FilteredClassifier; import weka.core.CheckGOE; import weka.core.CheckOptionHandler; import weka.core.Instance; import weka.core.Instances; import weka.core.OptionHandler; import weka.core.SerializationHelper; import weka.core.TestInstances; import weka.core.Capabilities.Capability; import weka.test.Regression; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.StringWriter; import junit.framework.TestCase; /** * Abstract Test class for Filters. * * @author <a href="mailto:len@reeltwo.com">Len Trigg</a> * @authro FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 6439 $ */ public abstract class AbstractFilterTest extends TestCase { // TODO: // * Check that results between incremental and batch use are // the same // * Check batch operation is OK // * Check memory use between subsequent runs // * Check memory use when multiplying data? /** Set to true to print out extra info during testing */ protected static boolean VERBOSE = false; /** The filter to be tested */ protected Filter m_Filter; /** A set of instances to test with */ protected Instances m_Instances; /** the OptionHandler tester */ protected CheckOptionHandler m_OptionTester; /** the FilteredClassifier instance used for tests */ protected FilteredClassifier m_FilteredClassifier; /** for testing GOE stuff */ protected CheckGOE m_GOETester; /** * Constructs the <code>AbstractFilterTest</code>. Called by subclasses. * * @param name the name of the test class */ public AbstractFilterTest(String name) { super(name); } /** * Called by JUnit before each test method. This implementation creates * the default filter to test and loads a test set of Instances. * * @throws Exception if an error occurs reading the example instances. */ protected void setUp() throws Exception { m_Filter = getFilter(); m_Instances = new Instances(new BufferedReader(new InputStreamReader(ClassLoader.getSystemResourceAsStream("weka/filters/data/FilterTest.arff")))); m_OptionTester = getOptionTester(); m_GOETester = getGOETester(); m_FilteredClassifier = getFilteredClassifier(); } /** Called by JUnit after each test method */ protected void tearDown() { m_Filter = null; m_Instances = null; m_OptionTester = null; m_GOETester = null; m_FilteredClassifier = null; } /** * Configures the CheckOptionHandler uses for testing the optionhandling. * Sets the scheme to test. * * @return the fully configured CheckOptionHandler */ protected CheckOptionHandler getOptionTester() { CheckOptionHandler result; result = new CheckOptionHandler(); if (getFilter() instanceof OptionHandler) result.setOptionHandler((OptionHandler) getFilter()); else result.setOptionHandler(null); result.setUserOptions(new String[0]); result.setSilent(true); return result; } /** * Configures the CheckGOE used for testing GOE stuff. * Sets the Filter returned from the getFilter() method. * * @return the fully configured CheckGOE * @see #getFilter() */ protected CheckGOE getGOETester() { CheckGOE result; result = new CheckGOE(); result.setObject(getFilter()); result.setSilent(true); return result; } /** * returns the configured FilteredClassifier. Since the base classifier is * determined heuristically, derived tests might need to adjust it. * * @return the configured FilteredClassifier */ protected FilteredClassifier getFilteredClassifier() { FilteredClassifier result; Filter filter; Classifier cls; result = new FilteredClassifier(); // set filter filter = getFilter(); result.setFilter(filter); // set classifier if (filter.getCapabilities().handles(Capability.NOMINAL_CLASS)) cls = new weka.classifiers.trees.J48(); else if (filter.getCapabilities().handles(Capability.BINARY_CLASS)) cls = new weka.classifiers.trees.J48(); else if (filter.getCapabilities().handles(Capability.UNARY_CLASS)) cls = new weka.classifiers.trees.J48(); else if (filter.getCapabilities().handles(Capability.NUMERIC_CLASS)) cls = new weka.classifiers.trees.M5P(); else if (filter.getCapabilities().handles(Capability.DATE_CLASS)) cls = new weka.classifiers.trees.M5P(); else throw new IllegalStateException("Cannot determine base classifier for FilteredClassifier!"); result.setClassifier(cls); return result; } /** * returns data generated for the FilteredClassifier test * * @return the dataset for the FilteredClassifier * @throws Exception if generation of data fails */ protected Instances getFilteredClassifierData() throws Exception { TestInstances test; Instances result; // NB: in order to make sure that the classifier can handle the data, // we're using the classifier's capabilities to generate the data. test = TestInstances.forCapabilities( m_FilteredClassifier.getClassifier().getCapabilities()); test.setClassIndex(TestInstances.CLASS_IS_LAST); result = test.generate(); return result; } /** * Used to create an instance of a specific filter. The filter * should be configured to operate on a dataset that contains * attributes in this order:<p> * * String, Nominal, Numeric, String, Nominal, Numeric<p> * * Where the first three attributes do not contain any missing values, * but the last three attributes do. If the filter is for some reason * incapable of accepting a dataset of this type, override setUp() to * either manipulate the default dataset to be compatible, or load another * test dataset. <p> * * The configured filter should preferrably do something * meaningful, since the results of filtering are used as the default * regression output (and it would hardly be interesting if the filtered * data was the same as the input data). * * @return a suitably configured <code>Filter</code> value */ public abstract Filter getFilter(); /** * Simple method to return the filtered set of test instances after * passing through the test filter. m_Filter contains the filter and * m_Instances contains the test instances. * * @return the Instances after filtering through the filter we have set * up to test. */ protected Instances useFilter() { Instances result = null; Instances icopy = new Instances(m_Instances); try { m_Filter.setInputFormat(icopy); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on setInputFormat(): \n" + ex.getMessage()); } try { result = Filter.useFilter(icopy, m_Filter); assertNotNull(result); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on useFilter(): \n" + ex.getMessage()); } return result; } /** * tests whether the scheme declares a serialVersionUID. */ public void testSerialVersionUID() { if (SerializationHelper.needsUID(m_Filter.getClass())) fail("Doesn't declare serialVersionUID!"); } /** * Test buffered operation. Output instances are only collected after * all instances are passed through */ public void testBuffered() { Instances icopy = new Instances(m_Instances); Instances result = null; try { m_Filter.setInputFormat(icopy); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on setInputFormat(): \n" + ex.getMessage()); } try { result = Filter.useFilter(icopy, m_Filter); assertNotNull(result); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on useFilter(): \n" + ex.getMessage()); } // Check the output is valid for printing by trying to write out to // a stringbuffer StringWriter sw = new StringWriter(2000); sw.write(result.toString()); // Check the input hasn't been modified // We just check the headers are the same and that the instance // count is the same. assertTrue(icopy.equalHeaders(m_Instances)); assertEquals(icopy.numInstances(), m_Instances.numInstances()); // Try repeating the filtering and check we get the same results Instances result2 = null; try { m_Filter.setInputFormat(icopy); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on setInputFormat(): \n" + ex.getMessage()); } try { result2 = Filter.useFilter(icopy, m_Filter); assertNotNull(result2); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on useFilter(): \n" + ex.getMessage()); } // Again check the input hasn't been modified // We just check the headers are the same and that the instance // count is the same. assertTrue(icopy.equalHeaders(m_Instances)); assertEquals(icopy.numInstances(), m_Instances.numInstances()); // Check the same results for both runs assertTrue(result.equalHeaders(result2)); assertEquals(result.numInstances(), result2.numInstances()); } /** * Test incremental operation. Each instance is removed as soon as it * is made available */ public void testIncremental() { Instances icopy = new Instances(m_Instances); Instances result = null; boolean headerImmediate = false; try { headerImmediate = m_Filter.setInputFormat(icopy); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on setInputFormat(): \n" + ex.getMessage()); } if (headerImmediate) { if (VERBOSE) System.err.println("Filter makes header immediately available."); result = m_Filter.getOutputFormat(); } // Pass all the instances to the filter for (int i = 0; i < icopy.numInstances(); i++) { if (VERBOSE) System.err.println("Input instance to filter"); boolean collectNow = false; try { collectNow = m_Filter.input(icopy.instance(i)); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on input(): \n" + ex.getMessage()); } if (collectNow) { if (VERBOSE) System.err.println("Filter said collect immediately"); if (!headerImmediate) { fail("Filter didn't return true from setInputFormat() earlier!"); } if (VERBOSE) System.err.println("Getting output instance"); result.add(m_Filter.output()); } } // Say that input has finished, and print any pending output instances if (VERBOSE) System.err.println("Setting end of batch"); boolean toCollect = false; try { toCollect = m_Filter.batchFinished(); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on batchFinished(): \n" + ex.getMessage()); } if (toCollect) { if (VERBOSE) System.err.println("Filter said collect output"); if (!headerImmediate) { if (VERBOSE) System.err.println("Getting output format"); result = m_Filter.getOutputFormat(); } if (VERBOSE) System.err.println("Getting output instance"); while (m_Filter.numPendingOutput() > 0) { result.add(m_Filter.output()); if (VERBOSE) System.err.println("Getting output instance"); } } assertNotNull(result); // Check the output iss valid for printing by trying to write out to // a stringbuffer StringWriter sw = new StringWriter(2000); sw.write(result.toString()); } /** * Describe <code>testRegression</code> method here. * */ public void testRegression() { Regression reg = new Regression(this.getClass()); Instances result = useFilter(); reg.println(result.toString()); try { String diff = reg.diff(); if (diff == null) { System.err.println("Warning: No reference available, creating."); } else if (!diff.equals("")) { fail("Regression test failed. Difference:\n" + diff); } } catch (java.io.IOException ex) { fail("Problem during regression testing.\n" + ex); } reg = new Regression(this.getClass()); // Run the filter using deprecated calls to check it still works the same Instances icopy = new Instances(m_Instances); try { m_Filter.setInputFormat(icopy); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on setInputFormat(): \n" + ex.getMessage()); } try { for (int i = 0; i < icopy.numInstances(); i++) { m_Filter.input(icopy.instance(i)); } m_Filter.batchFinished(); result = m_Filter.getOutputFormat(); weka.core.Instance processed; while ((processed = m_Filter.output()) != null) { result.add(processed); } assertNotNull(result); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on useFilter(): \n" + ex.getMessage()); } reg.println(result.toString()); try { String diff = reg.diff(); if (diff == null) { System.err.println("Warning: No reference available, creating."); } else if (!diff.equals("")) { fail("Regression test failed when using deprecated methods. Difference:\n" + diff); } } catch (java.io.IOException ex) { fail("Problem during regression testing.\n" + ex); } } public void testThroughput() { if (VERBOSE) { Instances icopy = new Instances(m_Instances); // Make a bigger dataset Instances result = null; for (int i = 0; i < 20000; i++) { icopy.add(m_Instances.instance(i%m_Instances.numInstances())); } long starttime, endtime; double secs, rate; // Time incremental usage starttime = System.currentTimeMillis(); boolean headerImmediate = false; try { headerImmediate = m_Filter.setInputFormat(icopy); if (headerImmediate) { result = m_Filter.getOutputFormat(); } for (int i = 0; i < icopy.numInstances(); i++) { boolean collectNow = false; collectNow = m_Filter.input(icopy.instance(i)); if (collectNow) { if (!headerImmediate) { fail("Filter didn't return true from setInputFormat() earlier!"); } result.add(m_Filter.output()); } } // Say that input has finished, and print any pending output instances boolean toCollect = false; toCollect = m_Filter.batchFinished(); if (toCollect) { if (!headerImmediate) { result = m_Filter.getOutputFormat(); } while (m_Filter.numPendingOutput() > 0) { result.add(m_Filter.output()); } } } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown during incremental filtering: \n" + ex.getMessage()); } endtime = System.currentTimeMillis(); secs = (double)(endtime - starttime) / 1000; rate = (double)icopy.numInstances() / secs; System.err.println("\n" + m_Filter.getClass().getName() + " incrementally processed " + rate + " instances per sec"); // Time batch usage starttime = System.currentTimeMillis(); try { m_Filter.setInputFormat(icopy); result = Filter.useFilter(icopy, m_Filter); assertNotNull(result); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown during batch filtering: \n" + ex.getMessage()); } endtime = System.currentTimeMillis(); secs = (double)(endtime - starttime) / 1000; rate = (double)icopy.numInstances() / secs; System.err.println("\n" + m_Filter.getClass().getName() + " batch processed " + rate + " instances per sec"); } } /** * tests the listing of the options */ public void testListOptions() { if (m_OptionTester.getOptionHandler() != null) { if (!m_OptionTester.checkListOptions()) fail("Options cannot be listed via listOptions."); } } /** * tests the setting of the options */ public void testSetOptions() { if (m_OptionTester.getOptionHandler() != null) { if (!m_OptionTester.checkSetOptions()) fail("setOptions method failed."); } } /** * tests whether the default settings are processed correctly */ public void testDefaultOptions() { if (m_OptionTester.getOptionHandler() != null) { if (!m_OptionTester.checkDefaultOptions()) fail("Default options were not processed correctly."); } } /** * tests whether there are any remaining options */ public void testRemainingOptions() { if (m_OptionTester.getOptionHandler() != null) { if (!m_OptionTester.checkRemainingOptions()) fail("There were 'left-over' options."); } } /** * tests the whether the user-supplied options stay the same after setting. * getting, and re-setting again. * * @see #getOptionTester() */ public void testCanonicalUserOptions() { if (m_OptionTester.getOptionHandler() != null) { if (!m_OptionTester.checkCanonicalUserOptions()) fail("setOptions method failed"); } } /** * tests the resetting of the options to the default ones */ public void testResettingOptions() { if (m_OptionTester.getOptionHandler() != null) { if (!m_OptionTester.checkSetOptions()) fail("Resetting of options failed"); } } /** * tests the filter in conjunction with the FilteredClassifier */ public void testFilteredClassifier() { Instances data; int i; // skip this test if a subclass has set the // filtered classifier to null if (m_FilteredClassifier == null) { return; } try { // generate data data = getFilteredClassifierData(); // build classifier m_FilteredClassifier.buildClassifier(data); // test classifier for (i = 0; i < data.numInstances(); i++) { m_FilteredClassifier.classifyInstance(data.instance(i)); } } catch (Exception e) { fail("Problem with FilteredClassifier: " + e.toString()); } } /** * simulates batch filtering */ public void testBatchFiltering() { Instances result = null; Instances icopy = new Instances(m_Instances); // setup filter try { if (m_Filter.setInputFormat(icopy)) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (setup)", result); } } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on setInputFormat(): \n" + ex); } // first batch try { for (int i = 0; i < icopy.numInstances(); i++) { if (m_Filter.input(icopy.instance(i))) { Instance out = m_Filter.output(); assertNotNull("Instance not made available immediately (1. batch)", out); result.add(out); } } m_Filter.batchFinished(); if (result == null) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (1. batch)", result); assertTrue("Pending output instances (1. batch)", m_Filter.numPendingOutput() > 0); } while (m_Filter.numPendingOutput() > 0) result.add(m_Filter.output()); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown during 1. batch: \n" + ex); } // second batch try { result = null; if (m_Filter.isOutputFormatDefined()) result = m_Filter.getOutputFormat(); for (int i = 0; i < icopy.numInstances(); i++) { if (m_Filter.input(icopy.instance(i))) { if (result == null) { fail("Filter didn't return true from isOutputFormatDefined() (2. batch)"); } else { Instance out = m_Filter.output(); assertNotNull("Instance not made available immediately (2. batch)", out); result.add(out); } } } m_Filter.batchFinished(); if (result == null) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (2. batch)", result); assertTrue("Pending output instances (2. batch)", m_Filter.numPendingOutput() > 0); } while (m_Filter.numPendingOutput() > 0) result.add(m_Filter.output()); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown during 2. batch: \n" + ex); } } /** * simulates batch filtering (with the second dataset being smaller) */ public void testBatchFilteringSmaller() { Instances result = null; Instances icopy = new Instances(m_Instances); // setup filter try { if (m_Filter.setInputFormat(icopy)) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (setup)", result); } } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on setInputFormat(): \n" + ex); } // first batch try { for (int i = 0; i < icopy.numInstances(); i++) { if (m_Filter.input(icopy.instance(i))) { Instance out = m_Filter.output(); assertNotNull("Instance not made available immediately (1. batch)", out); result.add(out); } } m_Filter.batchFinished(); if (result == null) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (1. batch)", result); assertTrue("Pending output instances (1. batch)", m_Filter.numPendingOutput() > 0); } while (m_Filter.numPendingOutput() > 0) result.add(m_Filter.output()); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown during 1. batch: \n" + ex); } // second batch try { result = null; if (m_Filter.isOutputFormatDefined()) result = m_Filter.getOutputFormat(); // delete some instances int num = (int) ((double) icopy.numInstances() * 0.3); for (int i = 0; i < num; i++) icopy.delete(0); for (int i = 0; i < icopy.numInstances(); i++) { if (m_Filter.input(icopy.instance(i))) { if (result == null) { fail("Filter didn't return true from isOutputFormatDefined() (2. batch)"); } else { Instance out = m_Filter.output(); assertNotNull("Instance not made available immediately (2. batch)", out); result.add(out); } } } m_Filter.batchFinished(); if (result == null) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (2. batch)", result); assertTrue("Pending output instances (2. batch)", m_Filter.numPendingOutput() > 0); } while (m_Filter.numPendingOutput() > 0) result.add(m_Filter.output()); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown during 2. batch: \n" + ex); } } /** * simulates batch filtering (with the second dataset being bigger) */ public void testBatchFilteringLarger() { Instances result = null; Instances icopy = new Instances(m_Instances); // setup filter try { if (m_Filter.setInputFormat(icopy)) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (setup)", result); } } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown on setInputFormat(): \n" + ex); } // first batch try { for (int i = 0; i < icopy.numInstances(); i++) { if (m_Filter.input(icopy.instance(i))) { Instance out = m_Filter.output(); assertNotNull("Instance not made available immediately (1. batch)", out); result.add(out); } } m_Filter.batchFinished(); if (result == null) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (1. batch)", result); assertTrue("Pending output instances (1. batch)", m_Filter.numPendingOutput() > 0); } while (m_Filter.numPendingOutput() > 0) result.add(m_Filter.output()); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown during 1. batch: \n" + ex); } // second batch try { result = null; if (m_Filter.isOutputFormatDefined()) result = m_Filter.getOutputFormat(); // add some instances int num = (int) ((double) icopy.numInstances() * 0.3); for (int i = 0; i < num; i++) icopy.add(icopy.instance(i)); for (int i = 0; i < icopy.numInstances(); i++) { if (m_Filter.input(icopy.instance(i))) { if (result == null) { fail("Filter didn't return true from isOutputFormatDefined() (2. batch)"); } else { Instance out = m_Filter.output(); assertNotNull("Instance not made available immediately (2. batch)", out); result.add(out); } } } m_Filter.batchFinished(); if (result == null) { result = m_Filter.getOutputFormat(); assertNotNull("Output format defined (2. batch)", result); assertTrue("Pending output instances (2. batch)", m_Filter.numPendingOutput() > 0); } while (m_Filter.numPendingOutput() > 0) result.add(m_Filter.output()); } catch (Exception ex) { ex.printStackTrace(); fail("Exception thrown during 2. batch: \n" + ex); } } /** * tests for a globalInfo method */ public void testGlobalInfo() { if (!m_GOETester.checkGlobalInfo()) fail("No globalInfo method"); } /** * tests the tool tips */ public void testToolTips() { if (!m_GOETester.checkToolTips()) fail("Tool tips inconsistent"); } }