package cc.mallet.topics.tui;
import cc.mallet.util.*;
import cc.mallet.types.*;
import cc.mallet.topics.*;
import java.io.*;
public class EvaluateTopics {
static CommandOption.String evaluatorFilename = new CommandOption.String
(EvaluateTopics.class, "evaluator", "FILENAME", true, null,
"A serialized topic evaluator from a trained topic model.\n" +
"By default this is null, indicating that no file will be read.", null);
static CommandOption.String inputFile = new CommandOption.String
(EvaluateTopics.class, "input", "FILENAME", true, null,
"The filename from which to read the list of instances\n" +
"for which topics should be inferred. Use - for stdin. " +
"The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
static CommandOption.String docProbabilityFile = new CommandOption.String
(EvaluateTopics.class, "output-doc-probs", "FILENAME", true, null,
"The filename in which to write the inferred log probabilities\n" +
"per document. " +
"By default this is null, indicating that no file will be written.", null);
static CommandOption.String probabilityFile = new CommandOption.String
(EvaluateTopics.class, "output-prob", "FILENAME", true, "-",
"The filename in which to write the inferred log probability of the testing set\n" +
"Use - for stdout, which is the default.", null);
static CommandOption.Integer numParticles = new CommandOption.Integer
(EvaluateTopics.class, "num-particles", "INTEGER", true, 10,
"The number of particles to use in left-to-right evaluation.", null);
static CommandOption.Boolean usingResampling = new CommandOption.Boolean
(EvaluateTopics.class, "use-resampling", "TRUE|FALSE", false, false,
"Whether to resample topics in left-to-right evaluation. Resampling is more accurate, but leads to quadratic scaling in the lenght of documents.", null);
static CommandOption.Integer numIterations = new CommandOption.Integer
(EvaluateTopics.class, "num-iterations", "INTEGER", true, 100,
"The number of iterations of Gibbs sampling.", null);
static CommandOption.Integer sampleInterval = new CommandOption.Integer
(EvaluateTopics.class, "sample-interval", "INTEGER", true, 10,
"The number of iterations between saved samples.", null);
static CommandOption.Integer burnInIterations = new CommandOption.Integer
(EvaluateTopics.class, "burn-in", "INTEGER", true, 10,
"The number of iterations before the first sample is saved.", null);
static CommandOption.Integer randomSeed = new CommandOption.Integer
(EvaluateTopics.class, "random-seed", "INTEGER", true, 0,
"The random seed for the Gibbs sampler. Default is 0, which will use the clock.", null);
public static void main (String[] args) {
// Process the command-line options
CommandOption.setSummary (EvaluateTopics.class,
"Estimate the marginal probability of new documents under ");
CommandOption.process (EvaluateTopics.class, args);
if (evaluatorFilename.value == null) {
System.err.println("You must specify a serialized topic evaluator. Use --help to list options.");
System.exit(0);
}
if (inputFile.value == null) {
System.err.println("You must specify a serialized instance list. Use --help to list options.");
System.exit(0);
}
try {
PrintStream docProbabilityStream = null;
if (docProbabilityFile.value != null) {
docProbabilityStream = new PrintStream(docProbabilityFile.value);
}
PrintStream outputStream = System.out;
if (probabilityFile.value != null &&
! probabilityFile.value.equals("-")) {
outputStream = new PrintStream(probabilityFile.value);
}
MarginalProbEstimator evaluator =
MarginalProbEstimator.read(new File(evaluatorFilename.value));
InstanceList instances = InstanceList.load (new File(inputFile.value));
outputStream.println(evaluator.evaluateLeftToRight(instances, numParticles.value,
usingResampling.value,
docProbabilityStream));
} catch (Exception e) {
e.printStackTrace();
System.err.println(e.getMessage());
}
}
}