/* Copyright 2003, Carnegie Mellon, All Rights Reserved */
package edu.cmu.minorthird.text;
import java.util.Iterator;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.apache.log4j.Logger;
import edu.cmu.minorthird.classify.BasicDataset;
import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.text.learn.CVSplitterTest;
import edu.cmu.minorthird.text.learn.SampleFE;
import edu.cmu.minorthird.text.learn.SpanFeatureExtractor;
/**
*
* @author William Cohen
*/
public class SubPopIdTest extends TestCase
{
private static Logger log = Logger.getLogger(CVSplitterTest.class);
public SubPopIdTest(String name) { super(name); }
public SubPopIdTest() { super("SubPopIdTest"); }
public static Test suite() { return new TestSuite(SubPopIdTest.class); }
public void testSubPop()
{
BasicTextBase base = new BasicTextBase();
base.loadDocument("b1","Mud Club");
base.setDocumentGroupId("b1","bar");
base.loadDocument("b2","CBGB's");
base.setDocumentGroupId("b2","bar");
base.loadDocument("f1","Mud Pie");
base.setDocumentGroupId("f1","foo");
base.loadDocument("f2","PBJ's");
base.setDocumentGroupId("f2","foo");
SpanFeatureExtractor fe = SampleFE.BAG_OF_WORDS;
Dataset data1 = new BasicDataset();
for (Iterator<Span> i=base.documentSpanIterator(); i.hasNext(); ) {
Span s = i.next();
data1.add( new Example( fe.extractInstance(new EmptyLabels(), s), ClassLabel.binaryLabel(+1 ) ));
}
TextLabels labels = new BasicTextLabels(base);
Dataset data2 = new BasicDataset();
for (Iterator<Span> i=base.documentSpanIterator(); i.hasNext(); ) {
Span s = i.next();
data2.add( new Example( fe.extractInstance(labels,s), ClassLabel.binaryLabel(+1) ) );
}
checkSubPopIds(data1);
checkSubPopIds(data2);
}
public void checkSubPopIds(Dataset d)
{
for (Iterator<Example> i=d.iterator(); i.hasNext(); ) {
Example e = i.next();
Span span = (Span)e.getSource();
// make sure bi is in 'bar', and fi is in 'food'
assertEquals( span.getDocumentId().substring(0,1), e.getSubpopulationId().substring(0,1) );
assertTrue( !span.getDocumentId().equals(e.getSubpopulationId()) );
log.debug( "id: "+span.getDocumentId()+" subpop: "+e.getSubpopulationId() );
}
}
}