package chipmunk.segmenter.cmd;
import java.util.Iterator;
import java.util.List;
import marmot.util.FileUtils;
import chipmunk.segmenter.SegmentationDataReader;
import chipmunk.segmenter.Segmenter;
import chipmunk.segmenter.SegmenterOptions;
import chipmunk.segmenter.SegmenterTrainer;
import chipmunk.segmenter.Word;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
public class Train {
private static String TRAIN_FILE = "train-file";
private static String MODEL_FILE = "model-file";
public static void main(String[] args) throws JSAPException {
FlaggedOption opt;
JSAP jsap = new JSAP();
opt = new FlaggedOption(TRAIN_FILE).setRequired(true).setLongFlag(TRAIN_FILE);
jsap.registerParameter(opt);
opt = new FlaggedOption(MODEL_FILE).setRequired(true).setLongFlag(MODEL_FILE);
jsap.registerParameter(opt);
SegmenterOptions options = new SegmenterOptions();
options.registerOptions(jsap);
JSAPResult config = jsap.parse(args);
if (!config.success()) {
for (Iterator<?> errs = config.getErrorMessageIterator(); errs
.hasNext();) {
System.err.println("Error: " + errs.next());
}
System.err.println("Usage: ");
System.err.println(jsap.getUsage());
System.err.println(jsap.getHelp());
System.err.println();
System.exit(1);
}
options.setOptions(config);
SegmenterTrainer trainer = new SegmenterTrainer(options);
List<Word> words = new SegmentationDataReader(config.getString(TRAIN_FILE), options.getString(SegmenterOptions.LANG), options.getInt(SegmenterOptions.TAG_LEVEL)).getData();
Segmenter segmenter = trainer.train(words);
FileUtils.saveToFile(segmenter, config.getString(MODEL_FILE));
}
}