// TestThreadedTagger -- StanfordMaxEnt, A Maximum Entropy Toolkit
// Copyright (c) 2002-2011 Leland Stanford Junior University
//
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 1A
// Stanford CA 94305-9010
// USA
// Support/Questions: java-nlp-user@lists.stanford.edu
// Licensing: java-nlp-support@lists.stanford.edu
// http://www-nlp.stanford.edu/software/tagger.shtml
package edu.stanford.nlp.tagger.maxent;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Properties;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.StringUtils;
/**
* First, this runs a tagger once to see what results it comes up with.
* Then it runs the same tagger in two separate threads to make sure the results are the same.
* The results are printed to stdout; the user is expected to verify they are as expected.
*
* Normally you would run MaxentTagger with command line arguments such as:
*
* -model ../data/tagger/my-left3words-distsim-wsj-0-18.tagger
* -testFile ../data/tagger/test-wsj-19-21 -verboseResults false
*
* If you provide the same arguments to this program, it will first
* run the given tagger on the given test file once to establish the
* "baseline" results. It will then run the same tagger in more than
* one thread at the same time; the output for both threads should be
* the same if the MaxentTagger is re-entrant. The number of threads
* to be run can be specified with -numThreads; the default is
* DEFAULT_NUM_THREADS.
*
* You can also provide multiple models. After performing that test
* on model1, it will then run the same test file on model2, model3,
* etc to establish baseline results for that tagger. After that, it
* runs both taggers at the same time. The taggers should be
* completely separate structures. In other words, the second tagger
* should not have clobbered any static state initialized by the first
* tagger. Thus, the results of the two simultaneous taggers should
* be the same as the two taggers' baselines.
*
* Example arguments for the more complicated test:
*
* -model1 ../data/pos-tagger/newmodels/left3words-distsim-wsj-0-18.tagger
* -model2 ../data/pos-tagger/newmodels/left3words-wsj-0-18.tagger
* -testFile ../data/pos-tagger/training/english/test-wsj-19-21
* -verboseResults false
*
* @author John Bauer
*/
class TestThreadedTagger {
/**
* Default number of threads to launch in the first test.
* Can be specified with -numThreads.
*/
static final int DEFAULT_NUM_THREADS = 2;
static final String THREAD_FLAG = "numThreads";
private TestThreadedTagger() {} // static methods
/**
* This internal class takes a config, a tagger, and a thread name.
* The "run" method then runs the given tagger on the data file
* specified in the config.
*/
private static class TaggerThread extends Thread {
private final MaxentTagger tagger;
private final String threadName;
private String resultsString = "";
public String getResultsString() { return resultsString; }
TaggerThread(MaxentTagger tagger, String name) {
this.tagger = tagger;
this.threadName = name;
}
@Override
public void run() {
try {
Timing t = new Timing();
TestClassifier testClassifier = new TestClassifier(tagger);
long millis = t.stop();
resultsString = testClassifier.resultsString(tagger);
System.out.println("Thread " + threadName + " took " + millis +
" milliseconds to tag " + testClassifier.getNumWords() +
" words.\n" + resultsString);
} catch(IOException e) {
throw new RuntimeException(e);
}
}
} // end class TaggerThread
public static void compareResults(String results, String baseline) {
if (!results.equals(baseline)) {
throw new RuntimeException("Results different from expected baseline");
}
}
public static void main(final String[] args)
throws ClassNotFoundException, IOException, InterruptedException
{
Properties props = StringUtils.argsToProperties(args);
runThreadedTest(props);
}
public static void runThreadedTest(Properties props)
throws ClassNotFoundException, IOException, InterruptedException
{
ArrayList<Properties> configs = new ArrayList<>();
ArrayList<MaxentTagger> taggers = new ArrayList<>();
int numThreads = DEFAULT_NUM_THREADS;
// let the user specify how many threads to run in the first test case
if (props.getProperty(THREAD_FLAG) != null) {
numThreads = Integer.valueOf(props.getProperty(THREAD_FLAG));
}
// read in each of the taggers specified on the command line
System.out.println();
System.out.println("Loading taggers...");
System.out.println();
if (props.getProperty("model") != null) {
configs.add(props);
taggers.add(new MaxentTagger(configs.get(0).getProperty("model"), configs.get(0)));
} else {
int taggerNum = 1;
String taggerName = "model" + taggerNum;
while (props.getProperty(taggerName) != null) {
Properties newProps = new Properties();
newProps.putAll(props);
newProps.setProperty("model", props.getProperty(taggerName));
configs.add(newProps);
taggers.add(new MaxentTagger(configs.get(taggerNum - 1).getProperty("model"),
configs.get(taggerNum - 1)));
++taggerNum;
taggerName = "model" + taggerNum;
}
}
// no models at all => bad
if (taggers.isEmpty()) {
throw new IllegalArgumentException("Please specify at least one of " +
"-model or -model1");
}
System.out.println();
System.out.println("Running the baseline results for tagger 1");
System.out.println();
// run baseline results for the first tagger model
TaggerThread baselineThread =
new TaggerThread(taggers.get(0), "BaseResults-1");
baselineThread.start();
baselineThread.join();
ArrayList<String> baselineResults = new ArrayList<>();
baselineResults.add(baselineThread.getResultsString());
System.out.println();
System.out.println("Running " + numThreads + " threads of tagger 1");
System.out.println();
// run the first tagger in X separate threads at the same time
// at the end of this test, those X threads should produce the same results
ArrayList<TaggerThread> threads = new ArrayList<>();
for (int i = 0; i < numThreads; ++i) {
threads.add(new TaggerThread(taggers.get(0),
"Simultaneous-" + (i + 1)));
}
for (TaggerThread thread : threads) {
thread.start();
}
for (TaggerThread thread : threads) {
thread.join();
compareResults(thread.getResultsString(),
baselineResults.get(0));
}
// if we have more than one model...
if (taggers.size() > 1) {
// first, produce baseline results for the other models
// do this one thread at a time so we know there are no
// thread-related screwups
// TODO: would iterables be cleaner?
for (int i = 1; i < taggers.size(); ++i) {
System.out.println();
System.out.println("Running the baseline results for tagger " + (i + 1));
System.out.println();
baselineThread = new TaggerThread(taggers.get(i),
"BaseResults-" + (i + 1));
baselineThread.start();
baselineThread.join();
baselineResults.add(baselineThread.getResultsString());
}
System.out.println();
System.out.println("Running " + taggers.size() +
" threads of different taggers");
System.out.println();
// now, run the X models at the same time. there used to be a
// whole bunch of static state in the tagger, which used to mean
// such a thing was not be possible to do. now that should not
// be a problem any more
threads.clear();
for (int i = 0; i < taggers.size(); ++i) {
threads.add(new TaggerThread(taggers.get(i),
"DifferentTaggers-" + (i + 1)));
}
for (TaggerThread thread : threads) {
thread.start();
}
for (int i = 0; i < taggers.size(); ++i) {
TaggerThread thread = threads.get(i);
thread.join();
compareResults(thread.getResultsString(),
baselineResults.get(i));
}
}
System.out.println("Done!");
}
}