import edu.cmu.minorthird.util.*;
import edu.cmu.minorthird.util.gui.*;
import edu.cmu.minorthird.text.*;
import edu.cmu.minorthird.text.gui.*;
import edu.cmu.minorthird.text.mixup.*;
import edu.cmu.minorthird.text.learn.*;
import edu.cmu.minorthird.classify.experiments.*;
import edu.cmu.minorthird.classify.*;
import edu.cmu.minorthird.classify.sequential.*;
import java.util.*;
import java.io.*;
/** A simple extraction-oriented feature extractor to apply to one-token spans, for extraction tasks.
*/
public class NameFE implements SpanFeatureExtractor,Serializable
{
private int windowSize=5;
private boolean useCharType=true;
private boolean useCompressedCharType=true;
private boolean useEqOnNonAnchors=false;
private String[] tokenPropertyFeatures=new String[0];
private String requiredAnnotation="nameFeatures_v2";
//
// getters and setters
//
public int getWindowSize() { return windowSize; }
public void setWindowSize(int n) { windowSize=n; }
public boolean getUseCharType() { return useCharType; }
public void setUseCharType(boolean flag) { useCharType=flag; }
public boolean getUseEqOnNonAnchors() { return useEqOnNonAnchors; }
public void setUseEqOnNonAnchors(boolean flag) { useEqOnNonAnchors=flag; }
public boolean getUseCompressedCharType() { return useCompressedCharType; }
public void setUseCompressedCharType(boolean flag) { useCompressedCharType=flag; }
public void setRequiredAnnotation(String s) { requiredAnnotation=s; }
public String getRequiredAnnotation() { return requiredAnnotation; }
public String getTokenPropertyFeatures() { return StringUtil.toString(tokenPropertyFeatures); }
public void setTokenPropertyFeatures(String commaSeparatedTokenPropertyList) {
tokenPropertyFeatures = commaSeparatedTokenPropertyList.split(",\\s*");
//System.out.println("input: "+commaSeparatedTokenPropertyList);
//System.out.println("tokenPropertyFeatures: "+StringUtil.toString(tokenPropertyFeatures));
}
public void setTokenPropertyFeatures(Set propertySet) {
tokenPropertyFeatures = (String[])propertySet.toArray(new String[propertySet.size()]);
}
public Instance extractInstance(Span s)
{
return extractInstance(new EmptyLabels(), s);
}
public Instance extractInstance(TextLabels labels, Span s)
{
// need to run the nameFeatures.mixup file
if (!labels.isAnnotatedBy(requiredAnnotation)) {
System.out.println("labels need "+requiredAnnotation);
Dependencies.runDependency((MonotonicTextLabels)labels, requiredAnnotation, requiredAnnotation+".mixup");
}
FeatureBuffer buf = new FeatureBuffer(labels,s);
if (useEqOnNonAnchors) {
SpanFE.from(s,buf).tokens().eq().lc().emit();
} else {
SpanFE.from(s,buf).tokens().hasProp("anchor").eq().lc().emit();
}
if (useCompressedCharType) {
SpanFE.from(s,buf).tokens().eq().charTypePattern().emit();
}
if (useCharType) {
SpanFE.from(s,buf).tokens().eq().charTypes().emit();
}
for (int j=0; j<tokenPropertyFeatures.length; j++) {
SpanFE.from(s,buf).tokens().prop(tokenPropertyFeatures[j]).emit();
}
for (int i=0; i<windowSize; i++) {
if (useEqOnNonAnchors) {
SpanFE.from(s,buf).left().token(-i-1).eq().lc().emit();
SpanFE.from(s,buf).right().token(i).eq().lc().emit();
} else {
SpanFE.from(s,buf).left().token(-i-1).hasProp("anchor").eq().lc().emit();
SpanFE.from(s,buf).right().token(i).hasProp("anchor").eq().lc().emit();
}
for (int j=0; j<tokenPropertyFeatures.length; j++) {
SpanFE.from(s,buf).left().token(i-1).prop(tokenPropertyFeatures[j]).emit();
}
for (int j=0; j<tokenPropertyFeatures.length; j++) {
SpanFE.from(s,buf).right().token(i).prop(tokenPropertyFeatures[j]).emit();
}
if (useCompressedCharType) {
SpanFE.from(s,buf).left().token(-i-1).eq().charTypePattern().emit();
}
if (useCharType) {
SpanFE.from(s,buf).right().token(i).eq().charTypes().emit();
}
}
return buf.getInstance();
}
}