PersonNameTagger.java example

Explorer
MinorThird-master
import edu.cmu.minorthird.util.*;
import edu.cmu.minorthird.util.gui.*;
import edu.cmu.minorthird.text.*;
import edu.cmu.minorthird.text.gui.*;
import edu.cmu.minorthird.text.mixup.*;
import edu.cmu.minorthird.text.learn.*;
import edu.cmu.minorthird.classify.*;
import edu.cmu.minorthird.classify.algorithms.linear.*;
import edu.cmu.minorthird.classify.algorithms.trees.*;
import edu.cmu.minorthird.classify.algorithms.svm.*;
import edu.cmu.minorthird.classify.experiments.*;
import edu.cmu.minorthird.classify.sequential.*;
import java.util.*;
import java.io.*;

public class PersonNameTagger extends AbstractAnnotator
{
	private Annotator learnedAnnotator;
	private MixupProgram featureProgram;

	public PersonNameTagger(String learnedAnnotatorFile) throws IOException,Mixup.ParseException
	{
		learnedAnnotator = (Annotator)IOUtil.loadSerialized(new File(learnedAnnotatorFile));
		featureProgram = new MixupProgram(new File("nameFeatures.mixup")); 
	}

	public void doAnnotate(MonotonicTextLabels labels)
	{
            MixupInterpreter interp = new MixupInterpreter(featureProgram);
            interp.eval(labels);
            learnedAnnotator.annotate( labels );
	}

	public String explainAnnotation(TextLabels labels,Span span)
	{
		return "just because";
	}

	public static void main(String[] args)
	{
		try {
			PersonNameTagger tagger = new PersonNameTagger(args[0]);
			TextBaseLoader baseLoader = new TextBaseLoader(TextBaseLoader.DOC_PER_FILE, TextBaseLoader.FILE_NAME);
			TextBase base = baseLoader.load(new File(args[1]));

			MonotonicTextLabels labels = new BasicTextLabels( base );
			tagger.annotate( labels );
			saveType(labels, "predicted_name", new File(args[2]));
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println("usage: annotatorFile mailDirectory tags");
		}
	}
	private static void saveType(TextLabels labels, String type, File file) throws FileNotFoundException
	{
		PrintStream out = new PrintStream(new FileOutputStream(file));
		for (Span.Looper j=labels.instanceIterator(type); j.hasNext(); ) {
			Span s = j.nextSpan();
			if (s.size()>0) {
				int lo = s.getTextToken(0).getLo();
				int hi = s.getTextToken(s.size()-1).getHi();
				out.println("addToType "+s.getDocumentId()+" "+lo+" "+(hi-lo)+" "+type);
			}
		}
		out.close();
	}
}