package chipmunk.test.segmenter; import static org.junit.Assert.assertTrue; import java.util.logging.Logger; import marmot.util.Copy; import marmot.util.Numerics; import org.junit.Test; import chipmunk.segmenter.Scorer; import chipmunk.segmenter.SegmentationDataReader; import chipmunk.segmenter.Segmenter; import chipmunk.segmenter.SegmenterOptions; import chipmunk.segmenter.SegmenterTrainer; public class SegmenterTest { @Test public void trainAccuracyTest() { String trainfile = "res:///chipmunk/test/segmenter/data/eng/trn"; SegmentationDataReader reader = new SegmentationDataReader(trainfile, "eng", 0); SegmenterOptions options = new SegmenterOptions(); options.setOption(SegmenterOptions.LANG, "eng"); SegmenterTrainer trainer = new SegmenterTrainer(options); Segmenter segmenter = trainer.train(reader.getData()); segmenter = Copy.clone(segmenter); Logger logger = Logger.getLogger(getClass().getName()); Scorer scorer = new Scorer(); scorer.eval(reader.getData(), segmenter); logger.info(scorer.report()); double fscore = scorer.getFscore(); assertTrue(Numerics.approximatelyGreaterEqual(fscore, 99.)); } @Test public void crfTrainAccuracyTest() { String trainfile = "res:///chipmunk/test/segmenter/data/eng/trn"; SegmentationDataReader reader = new SegmentationDataReader(trainfile, "eng", 0); SegmenterOptions options = new SegmenterOptions(); options.setOption(SegmenterOptions.LANG, "eng"); options.setOption(SegmenterOptions.CRF_MODE, true); SegmenterTrainer trainer = new SegmenterTrainer(options); Segmenter segmenter = trainer.train(reader.getData()); segmenter = Copy.clone(segmenter); Logger logger = Logger.getLogger(getClass().getName()); Scorer scorer = new Scorer(); scorer.eval(reader.getData(), segmenter); logger.info(scorer.report()); double fscore = scorer.getFscore(); assertTrue(Numerics.approximatelyGreaterEqual(fscore, 99.)); } }