package edu.cmu.minorthird.text.learn;
import java.io.Serializable;
import java.util.Iterator;
import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.Classifier;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.Instance;
import edu.cmu.minorthird.classify.OnlineClassifierLearner;
import edu.cmu.minorthird.text.AbstractAnnotator;
import edu.cmu.minorthird.text.BasicTextBase;
import edu.cmu.minorthird.text.BasicTextLabels;
import edu.cmu.minorthird.text.MonotonicTextLabels;
import edu.cmu.minorthird.text.MutableTextLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextBase;
import edu.cmu.minorthird.text.TextLabels;
import edu.cmu.minorthird.ui.Recommended;
/**
* Provides a way to Edit document labels and add them to the learner
*
* @author Cameron Williams
*/
public class OnlineBinaryTextClassifierLearner extends AbstractAnnotator
implements OnlineTextClassifierLearner,Serializable{
static final long serialVersionUID=20080306L;
// internal state
private SpanFeatureExtractor fe=null;
public OnlineClassifierLearner learner;
public String spanType,outputType;
private int docNum;
private final static String DOC="OnlineDocument_";
public OnlineBinaryTextClassifierLearner(OnlineClassifierLearner learner,
String spanType){
this(learner,spanType,null,null);
}
public OnlineBinaryTextClassifierLearner(OnlineClassifierLearner learner,
String spanType,TextLabels labeledData){
this(learner,spanType,labeledData,null);
}
public OnlineBinaryTextClassifierLearner(OnlineClassifierLearner learner,
String spanType,TextLabels labeledData,SpanFeatureExtractor fe){
this.learner=learner;
this.spanType=spanType;
this.outputType="_predicted_"+spanType;
this.docNum=0;
if(fe!=null)
this.fe=fe;
else
this.fe=new Recommended.DocumentFE();
if(labeledData!=null)
addLabeledData(labeledData);
}
/** Add already labeled data to the learner */
private void addLabeledData(TextLabels labels){
TextBase tb=labels.getTextBase();
for(Iterator<Span> looper=tb.documentSpanIterator();looper.hasNext();){
Span s=looper.next();
int classLabel=labels.hasType(s,spanType)?+1:-1;
Instance i=fe.extractInstance(labels,s);
Example ex=new Example(i,ClassLabel.binaryLabel(classLabel));
learner.addExample(ex);
}
}
/** Provide document string with a label and add to the learner */
@Override
public void addDocument(String label,String text){
BasicTextBase tb=new BasicTextBase();
docNum++;
String docID=DOC+docNum;
tb.loadDocument(docID,text);
MutableTextLabels textLabels=new BasicTextLabels(tb);
Span docSpan=tb.documentSpan(docID);
textLabels.addToType(docSpan,label);
int classLabel=textLabels.hasType(docSpan,spanType)?+1:-1;
// int negClassLabel=textLabels.hasType(docSpan,"NOT"+spanType)?+1:-1;
Instance i=fe.extractInstance(textLabels,docSpan);
Example ex=new Example(i,ClassLabel.binaryLabel(classLabel));
learner.addExample(ex);
}
/** Returns the TextClassifier */
@Override
public TextClassifier getTextClassifier(){
TextClassifier tc=new BinaryTextClassifier(learner,fe);
return tc;
}
@Override
public Classifier getClassifier(){
return learner.getClassifier();
}
/** Tells the learner that no more examples are coming */
@Override
public void completeTraining(){
learner.completeTraining();
}
/** Erases all previous data from the learner */
@Override
public void reset(){
learner.reset();
}
@Override
public String[] getTypes(){
String[] types={spanType,"NOT"+spanType};
return types;
}
@Override
public void doAnnotate(MonotonicTextLabels labels){
Iterator<Span> candidateLooper=labels.getTextBase().documentSpanIterator();
Classifier c=learner.getClassifier();
for(Iterator<Span> i=candidateLooper;i.hasNext();){
Span s=i.next();
ClassLabel classOfS=c.classification(fe.extractInstance(labels,s));
if(spanType!=null&&classOfS.isPositive()){
labels.addToType(s,outputType);
}
}
}
@Override
public String explainAnnotation(TextLabels labels,Span documentSpan){
Classifier c=learner.getClassifier();
return c.explain(fe.extractInstance(labels,documentSpan));
}
@Override
public ClassifierAnnotator getAnnotator(){
ClassifierAnnotator ann=
new ClassifierAnnotator(fe,getClassifier(),outputType,null,null);
return ann;
}
}