TestThreadedTagger.java example

Explorer
CoreNLP-master
// TestThreadedTagger -- StanfordMaxEnt, A Maximum Entropy Toolkit
// Copyright (c) 2002-2011 Leland Stanford Junior University
//
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 1A
//    Stanford CA 94305-9010
//    USA
//    Support/Questions: java-nlp-user@lists.stanford.edu
//    Licensing: java-nlp-support@lists.stanford.edu
//    http://www-nlp.stanford.edu/software/tagger.shtml
package edu.stanford.nlp.tagger.maxent;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Properties;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.StringUtils;

/**
 * First, this runs a tagger once to see what results it comes up with.
 * Then it runs the same tagger in two separate threads to make sure the results are the same.
 * The results are printed to stdout; the user is expected to verify they are as expected.
 *
 * Normally you would run MaxentTagger with command line arguments such as:
 *
 * -model ../data/tagger/my-left3words-distsim-wsj-0-18.tagger
 * -testFile ../data/tagger/test-wsj-19-21 -verboseResults false
 *
 * If you provide the same arguments to this program, it will first
 * run the given tagger on the given test file once to establish the
 * "baseline" results.  It will then run the same tagger in more than
 * one thread at the same time; the output for both threads should be
 * the same if the MaxentTagger is re-entrant.  The number of threads
 * to be run can be specified with -numThreads; the default is
 * DEFAULT_NUM_THREADS.
 *
 * You can also provide multiple models.  After performing that test
 * on model1, it will then run the same test file on model2, model3,
 * etc to establish baseline results for that tagger.  After that, it
 * runs both taggers at the same time.  The taggers should be
 * completely separate structures.  In other words, the second tagger
 * should not have clobbered any static state initialized by the first
 * tagger.  Thus, the results of the two simultaneous taggers should
 * be the same as the two taggers' baselines.
 *
 * Example arguments for the more complicated test:
 *
 * -model1 ../data/pos-tagger/newmodels/left3words-distsim-wsj-0-18.tagger
 * -model2 ../data/pos-tagger/newmodels/left3words-wsj-0-18.tagger
 * -testFile ../data/pos-tagger/training/english/test-wsj-19-21
 * -verboseResults false
 *
 * @author John Bauer
 */
class TestThreadedTagger {
  /**
   * Default number of threads to launch in the first test.
   * Can be specified with -numThreads.
   */
  static final int DEFAULT_NUM_THREADS = 2;

  static final String THREAD_FLAG = "numThreads";


  private TestThreadedTagger() {} // static methods


  /**
   * This internal class takes a config, a tagger, and a thread name.
   * The "run" method then runs the given tagger on the data file
   * specified in the config.
   */
  private static class TaggerThread extends Thread {

    private final MaxentTagger tagger;
    private final String threadName;

    private String resultsString = "";
    public String getResultsString() { return resultsString; }

    TaggerThread(MaxentTagger tagger, String name) {
      this.tagger = tagger;
      this.threadName = name;
    }

    @Override
    public void run() {
      try {
        Timing t = new Timing();
        TestClassifier testClassifier = new TestClassifier(tagger);
        long millis = t.stop();
        resultsString = testClassifier.resultsString(tagger);
        System.out.println("Thread " + threadName + " took " + millis +
                           " milliseconds to tag " + testClassifier.getNumWords() +
                           " words.\n" + resultsString);
      } catch(IOException e) {
        throw new RuntimeException(e);
      }
    }
  } // end class TaggerThread

  public static void compareResults(String results, String baseline) {
    if (!results.equals(baseline)) {
      throw new RuntimeException("Results different from expected baseline");
    }
  }

  public static void main(final String[] args)
    throws ClassNotFoundException, IOException, InterruptedException
  {
    Properties props = StringUtils.argsToProperties(args);
    runThreadedTest(props);
  }

  public static void runThreadedTest(Properties props)
    throws ClassNotFoundException, IOException, InterruptedException
  {
    ArrayList<Properties> configs = new ArrayList<>();
    ArrayList<MaxentTagger> taggers = new ArrayList<>();
    int numThreads = DEFAULT_NUM_THREADS;

    // let the user specify how many threads to run in the first test case
    if (props.getProperty(THREAD_FLAG) != null) {
      numThreads = Integer.valueOf(props.getProperty(THREAD_FLAG));
    }

    // read in each of the taggers specified on the command line
    System.out.println();
    System.out.println("Loading taggers...");
    System.out.println();

    if (props.getProperty("model") != null) {
      configs.add(props);
      taggers.add(new MaxentTagger(configs.get(0).getProperty("model"), configs.get(0)));
    } else {
      int taggerNum = 1;
      String taggerName = "model" + taggerNum;
      while (props.getProperty(taggerName) != null) {
        Properties newProps = new Properties();
        newProps.putAll(props);
        newProps.setProperty("model", props.getProperty(taggerName));
        configs.add(newProps);
        taggers.add(new MaxentTagger(configs.get(taggerNum - 1).getProperty("model"),
                                     configs.get(taggerNum - 1)));

        ++taggerNum;
        taggerName = "model" + taggerNum;
      }
    }

    // no models at all => bad
    if (taggers.isEmpty()) {
      throw new IllegalArgumentException("Please specify at least one of " +
                                         "-model or -model1");
    }

    System.out.println();
    System.out.println("Running the baseline results for tagger 1");
    System.out.println();

    // run baseline results for the first tagger model
    TaggerThread baselineThread =
      new TaggerThread(taggers.get(0), "BaseResults-1");
    baselineThread.start();
    baselineThread.join();

    ArrayList<String> baselineResults = new ArrayList<>();
    baselineResults.add(baselineThread.getResultsString());

    System.out.println();
    System.out.println("Running " + numThreads + " threads of tagger 1");
    System.out.println();

    // run the first tagger in X separate threads at the same time
    // at the end of this test, those X threads should produce the same results
    ArrayList<TaggerThread> threads = new ArrayList<>();
    for (int i = 0; i < numThreads; ++i) {
      threads.add(new TaggerThread(taggers.get(0),
                                   "Simultaneous-" + (i + 1)));
    }
    for (TaggerThread thread : threads) {
      thread.start();
    }
    for (TaggerThread thread : threads) {
      thread.join();
      compareResults(thread.getResultsString(),
                     baselineResults.get(0));
    }

    // if we have more than one model...
    if (taggers.size() > 1) {
      // first, produce baseline results for the other models
      // do this one thread at a time so we know there are no
      // thread-related screwups
      // TODO: would iterables be cleaner?
      for (int i = 1; i < taggers.size(); ++i) {
        System.out.println();
        System.out.println("Running the baseline results for tagger " + (i + 1));
        System.out.println();

        baselineThread = new TaggerThread(taggers.get(i),
                                          "BaseResults-" + (i + 1));
        baselineThread.start();
        baselineThread.join();
        baselineResults.add(baselineThread.getResultsString());
      }

      System.out.println();
      System.out.println("Running " + taggers.size() +
                         " threads of different taggers");
      System.out.println();

      // now, run the X models at the same time.  there used to be a
      // whole bunch of static state in the tagger, which used to mean
      // such a thing was not be possible to do.  now that should not
      // be a problem any more
      threads.clear();
      for (int i = 0; i < taggers.size(); ++i) {
        threads.add(new TaggerThread(taggers.get(i),
                                     "DifferentTaggers-" + (i + 1)));
      }
      for (TaggerThread thread : threads) {
        thread.start();
      }
      for (int i = 0; i < taggers.size(); ++i) {
        TaggerThread thread = threads.get(i);
        thread.join();
        compareResults(thread.getResultsString(),
                       baselineResults.get(i));
      }
    }

    System.out.println("Done!");
  }
}