/* Copyright 2003, Carnegie Mellon, All Rights Reserved */ package edu.cmu.minorthird.text.mixup; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; import junit.framework.TestCase; import junit.framework.TestSuite; import org.apache.log4j.Logger; import edu.cmu.minorthird.text.BasicTextBase; import edu.cmu.minorthird.text.BasicTextLabels; import edu.cmu.minorthird.text.Document; import edu.cmu.minorthird.text.EncapsulatingAnnotatorLoader; import edu.cmu.minorthird.text.MonotonicTextLabels; import edu.cmu.minorthird.text.Span; import edu.cmu.minorthird.text.TextLabels; import edu.cmu.minorthird.text.TextToken; import edu.cmu.minorthird.util.gui.SmartVanillaViewer; import edu.cmu.minorthird.util.gui.ViewerFrame; /** * * @author William Cohen * @author Quinten Mercer */ public class MixupProgramTest extends TestSuite{ private static Logger log=Logger.getLogger(MixupProgramTest.class); private static final boolean DEBUG=false; public MixupProgramTest(String name){ super(name); } public static TestSuite suite(){ TestSuite suite=new TestSuite(); // suite.addTest( new SimpleProgramTest() ); //suite.addTest( new NestedProgramTest1() ); //suite.addTest( new NestedProgramTest2() ); suite.addTest(new NestedProgramTest3()); //suite.addTest( new ImplicitDeclareTest() ); //suite.addTest( new MultiLevelTest() ); return suite; } public static class AbstractProgramTest extends TestCase{ protected MonotonicTextLabels labels; protected final String testCaseDir= "edu/cmu/minorthird/text/mixup/testcases"; protected final String sep=File.pathSeparator; public AbstractProgramTest(String string){ super(string); BasicTextBase base=new BasicTextBase(); String trialDoc= contentsOfResourceFile(testCaseDir+"/seminar-official-news-2477.txt"); base.loadDocument("2477",trialDoc); labels=new BasicTextLabels(base); } protected void checkTime(TextLabels labels){ // should be one time, "10:45 a.m." Iterator<Span> i=labels.instanceIterator("extracted_time"); assertTrue(i.hasNext()); assertEquals("10:45 a.m.",i.next().asString()); assertTrue(!i.hasNext()); } protected void checkRoom(TextLabels labels){ // should be one time, "1112" Iterator<Span> i=labels.instanceIterator("extracted_room"); assertTrue(i.hasNext()); assertEquals("1112",i.next().asString()); assertTrue(!i.hasNext()); } protected void checkDate(TextLabels labels){ //should contain two dates, "Tuesday", "Feb. 21" and one time, "10:45 a.m." Iterator<Span> i=labels.instanceIterator("extracted_date"); assertTrue(i.hasNext()); assertEquals("Tuesday",i.next().asString()); assertTrue(i.hasNext()); assertEquals("Feb. 21",i.next().asString()); assertTrue(!i.hasNext()); } protected void checkName(TextLabels labels){ //should contain two names, Doherty Hall and Warren Baker Iterator<Span> i=labels.instanceIterator("extracted_name"); assertTrue(i.hasNext()); assertEquals("Doherty Hall",i.next().asString().replaceAll("\\s"," ")); assertTrue(i.hasNext()); assertEquals("Warren Baker",i.next().asString()); assertTrue(!i.hasNext()); } } /** make sure implicit declarations work */ public static class ImplicitDeclareTest extends AbstractProgramTest{ public ImplicitDeclareTest(){ super("doTest"); } public void doTest() throws Mixup.ParseException{ String testCaseDir="edu/cmu/minorthird/text/mixup/testcases"; String progDef= contentsOfResourceFile(testCaseDir+"/implicitDeclare.mixup"); MixupProgram prog=new MixupProgram(progDef); BasicTextBase base=new BasicTextBase(); String trialDoc= contentsOfResourceFile(testCaseDir+"/seminar-official-news-2477.txt"); base.loadDocument("2477",trialDoc); MonotonicTextLabels labels=new BasicTextLabels(base); MixupInterpreter interp=new MixupInterpreter(prog); interp.eval(labels); // just check there's no error raised } } /** directly runs a mixup program which does not call anything else */ public static class SimpleProgramTest extends AbstractProgramTest{ public SimpleProgramTest(){ super("doTest"); } public void doTest() throws Mixup.ParseException{ String testCaseDir="edu/cmu/minorthird/text/mixup/testcases"; String timeProgDef=contentsOfResourceFile(testCaseDir+"/xtime.mixup"); MixupProgram timeProg=new MixupProgram(timeProgDef); BasicTextBase base=new BasicTextBase(); String trialDoc= contentsOfResourceFile(testCaseDir+"/seminar-official-news-2477.txt"); base.loadDocument("2477",trialDoc); MonotonicTextLabels labels=new BasicTextLabels(base); MixupInterpreter interp=new MixupInterpreter(timeProg); interp.eval(labels); if(DEBUG) new ViewerFrame("xtime result",new SmartVanillaViewer(labels)); checkTime(labels); } } /** tests a mixup program which requires another mixup program using * an EncapsulatingAnnotatorLoader */ public static class NestedProgramTest1 extends AbstractProgramTest{ public NestedProgramTest1(){ super("doTest"); } public void doTest() throws Mixup.ParseException{ String testCaseDir="edu/cmu/minorthird/text/mixup/testcases"; String sep=File.pathSeparator; BasicTextBase base=new BasicTextBase(); String trialDoc= contentsOfResourceFile(testCaseDir+"/seminar-official-news-2477.txt"); base.loadDocument("2477",trialDoc); MonotonicTextLabels labels=new BasicTextLabels(base); EncapsulatingAnnotatorLoader eal= new EncapsulatingAnnotatorLoader(false,testCaseDir+"/xtime.mixup"+ sep+testCaseDir+"/xdate.mixup"); labels.setAnnotatorLoader(eal); MixupProgram callingProgram=new MixupProgram("require 'xdate';"); MixupInterpreter interp=new MixupInterpreter(callingProgram); interp.eval(labels); if(DEBUG) new ViewerFrame("xdate result",new SmartVanillaViewer(labels)); checkTime(labels); checkDate(labels); } } /** tests a mixup program which requires another mixup program * that's loaded from the classpath. This requires 'time.mixup' * to be on the classpath. */ public static class NestedProgramTest2 extends AbstractProgramTest{ public NestedProgramTest2(){ super("doTest"); } public void doTest() throws Mixup.ParseException{ try{ InputStream trialStream= this.getClass().getClassLoader().getResourceAsStream("time.mixup"); if(trialStream==null) throw new IllegalStateException( "null stream returned by getResourceAsStream"); }catch(Exception e){ log .warn("NestedProgramTest2 not run because couldn't find time.mixup on classpath.\nReason was: "+ e); return; } String testCaseDir="edu/cmu/minorthird/text/mixup/testcases"; BasicTextBase base=new BasicTextBase(); String trialDoc= contentsOfResourceFile(testCaseDir+"/seminar-official-news-2477.txt"); base.loadDocument("2477",trialDoc); MonotonicTextLabels labels=new BasicTextLabels(base); EncapsulatingAnnotatorLoader eal= new EncapsulatingAnnotatorLoader(false,testCaseDir+"/ydate.mixup"); labels.setAnnotatorLoader(eal); MixupProgram callingProgram=new MixupProgram("require 'ydate';"); MixupInterpreter interp=new MixupInterpreter(callingProgram); interp.eval(labels); if(DEBUG) new ViewerFrame("ydate result",new SmartVanillaViewer(labels)); checkTime(labels); checkDate(labels); } } /** * Tests mixup zall that requires xdate (which requires xtime) and * a java class RoomNumber which provides rooms. the compiled class * file for RoomNumber should be kept in the testcases directory. */ public static class NestedProgramTest3 extends AbstractProgramTest{ public NestedProgramTest3(){ super("doTest"); } public void doTest() throws Mixup.ParseException{ EncapsulatingAnnotatorLoader eal= new EncapsulatingAnnotatorLoader(false,testCaseDir+"/zall.mixup"+sep+ testCaseDir+"/xdate.mixup"+sep+testCaseDir+"/xtime.mixup"+sep+ testCaseDir+"/RoomNumber.class"); labels.setAnnotatorLoader(eal); labels.require("zall",null); if(DEBUG) new ViewerFrame("zall result",new SmartVanillaViewer(labels)); checkTime(labels); checkDate(labels); checkRoom(labels); checkName(labels); } } public static class MultiLevelTest extends AbstractProgramTest{ public MultiLevelTest(){ super("doTest"); } public void doTest() throws Mixup.ParseException{ String testCaseDir="edu/cmu/minorthird/text/mixup/testcases"; // Load the trial document into a text base BasicTextBase base=new BasicTextBase(); String trialDoc= contentsOfResourceFile(testCaseDir+"/seminar-official-news-2477.txt"); base.loadDocument("2477",trialDoc); // Load and execute a couple mixup programs to create some labels in this document. String timeProgDef=contentsOfResourceFile(testCaseDir+"/xtime.mixup"); MixupProgram timeProg=new MixupProgram(timeProgDef); MonotonicTextLabels labels=new BasicTextLabels(base); MixupInterpreter interp=new MixupInterpreter(timeProg); interp.eval(labels); String dateProgDef=contentsOfResourceFile(testCaseDir+"/xdate.mixup"); MixupProgram dateProg=new MixupProgram(dateProgDef); interp=new MixupInterpreter(dateProg); interp.eval(labels); // Now test the Multi-Level functions // Pseudotokens String pseudotokenProgDef= "defLevel newLevel = pseudotoken extracted_date;\n"; pseudotokenProgDef+="onLevel newLevel;\n"; pseudotokenProgDef+= "importFromLevel root extracted_date = extracted_date;\n"; MixupProgram pseudotokenProg=new MixupProgram(pseudotokenProgDef); interp=new MixupInterpreter(pseudotokenProg); interp.eval(labels); MonotonicTextLabels annotatedLabels=interp.getCurrentLabels(); //should contain two dates, "Tuesday", "Feb. 21" both should be made up of a single token Iterator<Span> i=annotatedLabels.instanceIterator("extracted_date"); // WARNING: There doesn't seem to be a way to get a list of tokens or spans that have a token property set. //Span.Looper i = annotatedLabels.getSpansWithProperty("Pseudotoken"); assertTrue(i.hasNext()); Span s=i.next(); assertEquals("Tuesday",s.asString()); assertEquals(1,s.size()); assertTrue(i.hasNext()); s=i.next(); assertEquals("Feb. 21",s.asString()); assertEquals(1,s.size()); assertTrue(!i.hasNext()); // Split String splitProgDef="defLevel newLevel = split \'\\.\';\n"; splitProgDef+="onLevel newLevel;\n"; MixupProgram splitProg=new MixupProgram(splitProgDef); interp=new MixupInterpreter(splitProg); interp.eval(labels); annotatedLabels=interp.getCurrentLabels(); // Should have tokenized the document up so that everything in between instances of '.' are single tokens. // For our example document this means that we should have 5 docs. Document d=annotatedLabels.getTextBase().getDocument("2477"); assertNotNull(d); TextToken[] tokens=d.getTokens(); assertEquals(6,tokens.length); assertEquals( "\nA seminar, entitled \"Graft Modification of Polymers in Twin Screw\nExtruders,\" will be given at 10:45 a", tokens[0].getValue()); assertEquals("m",tokens[1].getValue()); assertEquals(", Tuesday, Feb",tokens[2].getValue()); assertEquals(" 21, in Doherty\nHall 1112",tokens[3].getValue()); assertEquals( " Professor Warren Baker from the Department of Chemistry at\nQueen\'s University, Kingston, Canada, will conduct the seminar", tokens[4].getValue()); assertEquals("\n",tokens[5].getValue()); // Regex // Filter String filterProgDef="defLevel newLevel = filter extracted_date;\n"; filterProgDef+="onLevel newLevel;\n"; MixupProgram filterProg=new MixupProgram(filterProgDef); interp=new MixupInterpreter(filterProg); interp.eval(labels); annotatedLabels=interp.getCurrentLabels(); // There should now be two documents, one for each instance of extracted_date i=annotatedLabels.getTextBase().documentSpanIterator(); assertTrue(i.hasNext()); assertEquals("Tuesday",i.next().asString()); assertTrue(i.hasNext()); assertEquals("Feb. 21",i.next().asString()); assertTrue(!i.hasNext()); } } static private String contentsOfResourceFile(String fileName){ try{ InputStream s= MixupProgramTest.class.getClassLoader().getResourceAsStream(fileName); if(s==null) throw new IllegalStateException("can't find resouce "+fileName); byte[] buf=new byte[s.available()]; s.read(buf); s.close(); return new String(buf); }catch(IOException ex){ throw new IllegalStateException("couldn't find resouce '"+fileName+"': "+ ex); } } static public void main(String[] argv){ junit.textui.TestRunner.run(suite()); } }