package edu.cmu.minorthird.text.learn; import java.util.Iterator; import edu.cmu.minorthird.classify.SampleDatasets; import edu.cmu.minorthird.text.BasicTextBase; import edu.cmu.minorthird.text.BasicTextLabels; import edu.cmu.minorthird.text.Span; import edu.cmu.minorthird.text.TextLabels; import edu.cmu.minorthird.text.TextLabelsLoader; /** * Some sample inputs to facilitate testing. * * @author William Cohen */ public class SampleClassificationProblem{ // static private Logger log=Logger.getLogger(SampleClassificationProblem.class); // static private String[] posTrain=SampleDatasets.posTrain; // static private String[] negTrain=SampleDatasets.negTrain; // static private String[] posTest=SampleDatasets.posTest; // static private String[] negTest=SampleDatasets.negTest; static public TextLabels trainLabels(){ return makeLabels(SampleDatasets.posTrain,SampleDatasets.negTest); } static public TextLabels testLabels(){ return makeLabels(SampleDatasets.posTest,SampleDatasets.negTest); } static public TextLabels unlabeled(){ String[] pos=SampleDatasets.posTest; String[] neg=SampleDatasets.negTest; BasicTextBase base=new BasicTextBase(); for(int i=0;i<pos.length;i++){ base.loadDocument("pos."+i,pos[i]); } for(int i=0;i<neg.length;i++){ base.loadDocument("neg."+i,neg[i]); } BasicTextLabels labels=new BasicTextLabels(base); return labels; } static private TextLabels makeLabels(String[] pos,String[] neg){ BasicTextBase base=new BasicTextBase(); for(int i=0;i<pos.length;i++){ base.loadDocument("pos."+i,pos[i]); } for(int i=0;i<neg.length;i++){ base.loadDocument("neg."+i,neg[i]); } BasicTextLabels labels=new BasicTextLabels(base); for(Iterator<Span> i=base.documentSpanIterator();i.hasNext();){ Span s=i.next(); if(s.getDocumentId().startsWith("pos")){ labels.addToType(s,"fun"); } } new TextLabelsLoader().closeLabels(labels,TextLabelsLoader.CLOSE_ALL_TYPES); return labels; } }