NameFE.java example

Explorer
MinorThird-master
import edu.cmu.minorthird.util.*;
import edu.cmu.minorthird.util.gui.*;
import edu.cmu.minorthird.text.*;
import edu.cmu.minorthird.text.gui.*;
import edu.cmu.minorthird.text.mixup.*;
import edu.cmu.minorthird.text.learn.*;
import edu.cmu.minorthird.classify.experiments.*;
import edu.cmu.minorthird.classify.*;
import edu.cmu.minorthird.classify.sequential.*;

import java.util.*;
import java.io.*;

/** A simple extraction-oriented feature extractor to apply to one-token spans, for extraction tasks. 
 */

public class NameFE implements SpanFeatureExtractor,Serializable
{
	private int windowSize=5;
	private boolean useCharType=true;
	private boolean useCompressedCharType=true;
	private boolean useEqOnNonAnchors=false;
	private String[] tokenPropertyFeatures=new String[0];
	private String requiredAnnotation="nameFeatures_v2";
	
	//
	// getters and setters
	//
	
	public int getWindowSize() { return windowSize; }
	public void setWindowSize(int n) { windowSize=n; }
	
	public boolean getUseCharType() { return useCharType; } 
	public void setUseCharType(boolean flag) { useCharType=flag; } 
	
	public boolean getUseEqOnNonAnchors() { return useEqOnNonAnchors; }
	public void setUseEqOnNonAnchors(boolean flag) { useEqOnNonAnchors=flag; }

	public boolean getUseCompressedCharType() { return useCompressedCharType; } 
	public void setUseCompressedCharType(boolean flag) { useCompressedCharType=flag; } 
	
	public void setRequiredAnnotation(String s) { requiredAnnotation=s; }
	public String getRequiredAnnotation() { return requiredAnnotation; }

	public String getTokenPropertyFeatures() { return StringUtil.toString(tokenPropertyFeatures); }
	public void setTokenPropertyFeatures(String commaSeparatedTokenPropertyList) { 
		tokenPropertyFeatures = commaSeparatedTokenPropertyList.split(",\\s*");
		//System.out.println("input: "+commaSeparatedTokenPropertyList);
		//System.out.println("tokenPropertyFeatures: "+StringUtil.toString(tokenPropertyFeatures));
	}
	public void setTokenPropertyFeatures(Set propertySet) { 
		tokenPropertyFeatures = (String[])propertySet.toArray(new String[propertySet.size()]);
	}
	
	public Instance extractInstance(Span s)	
	{
		return extractInstance(new EmptyLabels(), s);
	}

	public Instance extractInstance(TextLabels labels, Span s)
	{
		// need to run the nameFeatures.mixup file
		if (!labels.isAnnotatedBy(requiredAnnotation)) {
			System.out.println("labels need "+requiredAnnotation);
			Dependencies.runDependency((MonotonicTextLabels)labels, requiredAnnotation, requiredAnnotation+".mixup");
		}

		FeatureBuffer buf = new FeatureBuffer(labels,s);
		if (useEqOnNonAnchors) {
			SpanFE.from(s,buf).tokens().eq().lc().emit();
		} else {
			SpanFE.from(s,buf).tokens().hasProp("anchor").eq().lc().emit();
		}
		if (useCompressedCharType) {
			SpanFE.from(s,buf).tokens().eq().charTypePattern().emit();
		}
		if (useCharType) {
			SpanFE.from(s,buf).tokens().eq().charTypes().emit();
		}
		for (int j=0; j<tokenPropertyFeatures.length; j++) {
			SpanFE.from(s,buf).tokens().prop(tokenPropertyFeatures[j]).emit();
		}
		for (int i=0; i<windowSize; i++) {
			if (useEqOnNonAnchors) {
				SpanFE.from(s,buf).left().token(-i-1).eq().lc().emit();
				SpanFE.from(s,buf).right().token(i).eq().lc().emit();
			} else {
				SpanFE.from(s,buf).left().token(-i-1).hasProp("anchor").eq().lc().emit();
				SpanFE.from(s,buf).right().token(i).hasProp("anchor").eq().lc().emit();
			}
			for (int j=0; j<tokenPropertyFeatures.length; j++) {
				SpanFE.from(s,buf).left().token(i-1).prop(tokenPropertyFeatures[j]).emit();
			}
			for (int j=0; j<tokenPropertyFeatures.length; j++) {
				SpanFE.from(s,buf).right().token(i).prop(tokenPropertyFeatures[j]).emit();
			}
			if (useCompressedCharType) {
				SpanFE.from(s,buf).left().token(-i-1).eq().charTypePattern().emit();
			}
			if (useCharType) {
				SpanFE.from(s,buf).right().token(i).eq().charTypes().emit();
			}
		}
		return buf.getInstance();
	}
}