package edu.cmu.minorthird.ui; import java.io.IOException; import java.io.Serializable; import org.apache.log4j.Logger; import edu.cmu.minorthird.classify.experiments.FixedTestSetSplitter; import edu.cmu.minorthird.text.Span; import edu.cmu.minorthird.text.learn.experiments.ExtractionEvaluation; import edu.cmu.minorthird.text.learn.experiments.TextLabelsExperiment; import edu.cmu.minorthird.util.CommandLineProcessor; import edu.cmu.minorthird.util.IOUtil; import edu.cmu.minorthird.util.JointCommandLineProcessor; import edu.cmu.minorthird.util.gui.SmartVanillaViewer; import edu.cmu.minorthird.util.gui.ViewerFrame; /** * Do a train/test experiment for named-entity extractors. * * @author William Cohen */ public class TrainTestExtractor extends UIMain { static Logger log = Logger.getLogger(TrainTestExtractor.class); // private data needed to train a extractor private CommandLineUtil.SaveParams save = new CommandLineUtil.SaveParams(); private CommandLineUtil.ExtractionSignalParams signal = new CommandLineUtil.ExtractionSignalParams(base); private CommandLineUtil.TrainExtractorParams train = new CommandLineUtil.TrainExtractorParams(); private CommandLineUtil.SplitterParams trainTest = new CommandLineUtil.SplitterParams(); private Object result = null; public String getTrainTestExtractorHelp() { return "<A HREF=\"http://minorthird.sourceforge.net/tutorials/TrainTestExtractor%20Tutorial.htm\">TrainTestExtractor Tutorial</A></html>"; } // for command-line ui @Override public CommandLineProcessor getCLP() { return new JointCommandLineProcessor(new CommandLineProcessor[]{gui,base,save,signal,train,trainTest}); } // for GUI public CommandLineUtil.SaveParams getSaveParameters() { return save; } public void setSaveParameters(CommandLineUtil.SaveParams save) { this.save=save; } public CommandLineUtil.ExtractionSignalParams getSignalParameters() { return signal; } public void setSignalParameters(CommandLineUtil.ExtractionSignalParams signal) { this.signal=signal; } public CommandLineUtil.TrainExtractorParams getTrainingParameters() { return train; } public void setTrainingParameters(CommandLineUtil.TrainExtractorParams train) { this.train=train; } public CommandLineUtil.SplitterParams getSplitterParameters() { return trainTest; } public void setSplitterParameters(CommandLineUtil.SplitterParams trainTest) { this.trainTest=trainTest; } // // do the experiment // @Override public void doMain() { // check that inputs are valid if (train.learner==null) throw new IllegalArgumentException("-learner must be specified"); if (signal.spanProp==null && signal.spanType==null) throw new IllegalArgumentException("one of -spanProp or -spanType must be specified"); if (signal.spanProp!=null && signal.spanType!=null) throw new IllegalArgumentException("only one of -spanProp or -spanType can be specified"); //no longer needed //if (train.fe != null) { //System.out.println("setting fe to "+train.fe); //train.learner.setSpanFeatureExtractor(train.fe); //} // set up the splitter if(trainTest.labels!=null){ if(signal.spanPropString!=null){ CommandLineUtil.createSpanProp(signal.spanPropString, trainTest.labels); } trainTest.splitter = new FixedTestSetSplitter<Span>( trainTest.labels.getTextBase().documentSpanIterator() ); System.out.println("splitter for test size "+trainTest.labels.getTextBase().size()+" is "+trainTest.splitter); } TextLabelsExperiment expt = new TextLabelsExperiment( base.labels, trainTest.splitter, trainTest.labels, train.learner, signal.spanType, signal.spanProp, train.output ); expt.doExperiment(); ExtractionEvaluation evaluation = expt.getEvaluation(); if (trainTest.showTestDetails) result = expt; else result = evaluation; if (base.showResult) new ViewerFrame("Experimental Result",new SmartVanillaViewer(result)); if (save.saveAs!=null) { try { IOUtil.saveSerialized(evaluation,save.saveAs); } catch (IOException e) { throw new IllegalArgumentException("can't save to "+save.saveAs+": "+e); } } } @Override public Object getMainResult() { return result; } public static void main(String args[]) { new TrainTestExtractor().callMain(args); } }