package edu.cmu.minorthird.text;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.apache.log4j.Logger;
import edu.cmu.minorthird.classify.BasicDataset;
import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.ClassifierLearner;
import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.algorithms.linear.PoissonLearner;
import edu.cmu.minorthird.classify.experiments.CrossValSplitter;
import edu.cmu.minorthird.classify.experiments.Evaluation;
import edu.cmu.minorthird.classify.experiments.Tester;
import edu.cmu.minorthird.text.learn.SpanFE;
import edu.cmu.minorthird.util.Globals;
import edu.cmu.minorthird.util.gui.ViewerFrame;
/**
*
* This class is responsible for...
*
* @author ksteppe
*/
public class BayesClassifiersTest extends TestCase{
Logger log=Logger.getLogger(this.getClass());
/**
* Standard test class constructior for BayesClassifiersTest
* @param name Name of the test
*/
public BayesClassifiersTest(String name){
super(name);
}
/**
* Convinence constructior for BayesClassifiersTest
*/
public BayesClassifiersTest(){
super("BayesClassifiersTest");
}
/**
* setUp to run before each test
*/
protected void setUp(){
Logger.getRootLogger().removeAllAppenders();
org.apache.log4j.BasicConfigurator.configure();
//TODO add initializations if needed
}
/**
* clean up to run after each test
*/
protected void tearDown(){
//TODO clean up resources if needed
}
/**
* Base test for BayesClassifiersTest
*/
public void testBayesClassifiersTest(){
try{ // load the documents into a textbase
File dir=new File(Globals.DATA_DIR+"bayes-testData");
TextBaseLoader loader=new TextBaseLoader();
TextBase base=loader.load(dir);
// set up labels
MutableTextLabels labels=new BasicTextLabels(base);
new TextLabelsLoader().importOps(labels,base,new File(Globals.DATA_DIR+
"bayes-testData.labels"));
// for verification/correction of the labels, if we care...
//TextBaseLabeler.label( labels, new File("my-document-labels.env"));
// set up a simple bag-of-words feature extractor
SpanFE fe=new SpanFE(){
static final long serialVersionUID=20080302L;
public void extractFeatures(TextLabels labels,Span s){
try{
from(s).tokens().eq().lc().punk()
.usewords("examples/t1.words.text").emit();
}catch(IOException e){
log.error(e,e);
}
//SpanFE.from(s,buf).tokens().eq().lc().punk().stopwords("remove").emit();
}
};
// check
log.debug(labels.getTypes().toString());
// create a binary dataset for the class 'rr'
Dataset data=new BasicDataset();
for(Iterator<Span> i=base.documentSpanIterator();i.hasNext();){
Span s=i.next();
//System.out.println( labels );
double label=labels.hasType(s,"rr")?+1:-1;
TextLabels textLabels=new EmptyLabels();
data.add(new Example(fe.extractInstance(textLabels,s),ClassLabel
.binaryLabel(label)));
}
new ViewerFrame("rr data",data.toGUI());
// System.exit(0);
// pick a learning algorithm
ClassifierLearner learner=new PoissonLearner();
// do a 10-fold cross-validation experiment
Evaluation v=Tester.evaluate(learner,data,new CrossValSplitter<Example>(10));
// display the results
new ViewerFrame("Results of 10-fold CV on 'rr'",v.toGUI());
}catch(Exception e){
log.error(e,e);
fail();
}
}
/**
* Creates a TestSuite from all testXXX methods
* @return TestSuite
*/
public static Test suite(){
return new TestSuite(BayesClassifiersTest.class);
}
/**
* Run the full suite of tests with text output
* @param args - unused
*/
public static void main(String args[]){
junit.textui.TestRunner.run(suite());
}
}