package org.aksw.gerbil.dataset.impl.senseval; import static org.junit.Assert.*; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.aksw.gerbil.exceptions.GerbilException; import org.aksw.gerbil.transfer.nif.Document; import org.aksw.gerbil.transfer.nif.Marking; import org.aksw.gerbil.transfer.nif.data.NamedEntity; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; @RunWith(Parameterized.class) public class SensevalDatasetTest { @Parameters public static Collection<Object[]> data() { List<Object[]> testConfigs = new ArrayList<Object[]>(); testConfigs .add(new Object[] {0, "src/test/resources/datasets/senseval/test.xml", "The art of change-ringing is peculiar to the English, and, like most English peculiarities, unintelligible to the rest of the world." , new String[]{"art", "change-ringing", "is", "peculiar", "English", "most", "English", "peculiarities", "unintelligible", "rest", "world"}}); testConfigs .add(new Object[] {1, "src/test/resources/datasets/senseval/test.xml", "-- Dorothy L. Sayers, `` The Nine Tailors ``" , new String[]{"Tailors"}}); testConfigs .add(new Object[] {2, "src/test/resources/datasets/senseval/test.xml", "ASLACTON, England" , new String[]{"England"}}); return testConfigs; } private String file; private int docIndex; private String expectedSentence; private String[] expectedMarkings; public SensevalDatasetTest(int docIndex, String file, String expectedSentence, String[] expectedMarkings) { this.file = file; this.docIndex= docIndex; this.expectedSentence=expectedSentence; this.expectedMarkings=expectedMarkings; } @Test public void test() throws GerbilException, IOException { SensevalDataset data = new SensevalDataset(this.file); data.init(); List<Document> documents = data.getInstances(); Document doc = documents.get(docIndex); assertEquals(expectedSentence, doc.getText()); List<Marking> markings = doc.getMarkings(); String[] marks = new String[markings.size()]; for(int i=0; i<markings.size();i++){ NamedEntity entity = ((NamedEntity)markings.get(i)); marks[i]=doc.getText().substring(entity.getStartPosition(), entity.getStartPosition()+entity.getLength()); } assertArrayEquals(expectedMarkings, marks); data.close(); } }