// Copyright 2014 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.morph.cmd;
import java.io.IOException;
import java.io.Writer;
import java.util.LinkedList;
import java.util.List;
import marmot.core.Sequence;
import marmot.morph.MorphEvaluator;
import marmot.morph.MorphModel;
import marmot.morph.MorphOptions;
import marmot.morph.MorphResult;
import marmot.morph.MorphTagger;
import marmot.morph.io.SentenceReader;
import marmot.util.FakeWriter;
import marmot.util.FileUtils;
import marmot.util.ListUtils;
public class CrossAnnotator {
public static void main(String[] args) throws IOException {
MorphOptions options = new MorphOptions();
options.setPropertiesFromStrings(args);
options.dieIfPropertyIsEmpty(MorphOptions.TRAIN_FILE);
annotate(options, options.getTrainFile(), options.getPredFile() , options.getNumChunks());
}
public static void annotate(MorphOptions options, String infile,
String outfile, int num_chunks) throws IOException {
List<Sequence> sequences = new LinkedList<Sequence>();
for (Sequence sequence : new SentenceReader(infile)) {
sequences.add(sequence);
}
Writer writer = null;
if (outfile == null || outfile.isEmpty()) {
writer = new FakeWriter();
} else {
writer = FileUtils.openFileWriter(outfile);
}
annotate(options, sequences, num_chunks, writer);
writer.close();
}
public static void annotate(MorphOptions options, List<Sequence> sequences,
int num_chunks, Writer writer) throws IOException {
List<List<Sequence>> chunks = ListUtils.chunk(sequences, num_chunks);
MorphResult result = null;
for (int i = 0; i < num_chunks; i++) {
if (options.getVerbose()) {
System.err.format("Processing chunk %d\n", i);
}
List<Sequence> chunk = chunks.get(i);
List<Sequence> complement = ListUtils.complement(chunks, i);
MorphTagger tagger = (MorphTagger) MorphModel.train(options, complement, chunk);
for (Sequence sequence : chunk) {
Annotator.annotate(tagger, sequence, writer);
}
if (options.getVerbose()) {
MorphEvaluator eval = new MorphEvaluator(chunk);
MorphResult chunk_result = eval.eval(tagger);
if (result == null) {
result = chunk_result;
} else {
result.increment(chunk_result);
}
}
if (result != null) {
System.err.println();
System.err.println();
System.err.println("Overall results:");
System.err.println(result);
}
}
}
}