package edu.cmu.minorthird.text.learn;
import java.io.Serializable;
import java.util.Iterator;
import edu.cmu.minorthird.classify.multi.MultiClassLabel;
import edu.cmu.minorthird.classify.multi.MultiClassifier;
import edu.cmu.minorthird.text.AbstractAnnotator;
import edu.cmu.minorthird.text.MonotonicTextLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextLabels;
/**
* An annotator that uses a learned Classifier to mark up document spans.
*/
public class MultiClassifierAnnotator extends AbstractAnnotator implements
Serializable{
static private final long serialVersionUID=20080306L;
private SpanFeatureExtractor fe;
private MultiClassifier mc;
private String[] multiSpanProp=null;
public MultiClassifierAnnotator(SpanFeatureExtractor fe,MultiClassifier mc,
String[] multiSpanProp){
this.fe=fe;
this.mc=mc;
this.multiSpanProp=multiSpanProp;
}
/** The feature extractor applied to candidate spans. */
public SpanFeatureExtractor getFE(){
return fe;
}
/*
* The classifier used on Instances extracted from candidate spans by the
* SpanFeatureExtractor getFE()
*/
public MultiClassifier getMultiClassifier(){
return mc;
}
/** If non-null, the property used to encode the output of the classifier. */
public String[] getMultiSpanProperty(){
return multiSpanProp;
}
@Override
public void doAnnotate(MonotonicTextLabels labels){
Iterator<Span> candidateLooper=labels.getTextBase().documentSpanIterator();
for(Iterator<Span> i=candidateLooper;i.hasNext();){
Span s=i.next();
MultiClassLabel classOfS=
mc.multiLabelClassification(fe.extractInstance(labels,s));
String[] bestClassNames=classOfS.bestClassName();
for(int j=0;j<bestClassNames.length;j++){
labels.setProperty(s,multiSpanProp[j],"_predicted_"+bestClassNames[j]);
}
}
}
@Override
public String explainAnnotation(TextLabels labels,Span documentSpan){
return mc.explain(fe.extractInstance(labels,documentSpan));
}
}