/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ package org.aksw.gerbil.dataset.impl.nif; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.aksw.gerbil.annotator.decorator.ErrorCountingAnnotatorDecorator; import org.aksw.gerbil.dataset.Dataset; import org.aksw.gerbil.dataset.DatasetConfiguration; import org.aksw.gerbil.datatypes.ExperimentType; import org.aksw.gerbil.evaluate.impl.ConfidenceBasedFMeasureCalculator; import org.aksw.gerbil.exceptions.GerbilException; import org.aksw.gerbil.matching.impl.MatchingsCounterImpl; import org.aksw.gerbil.transfer.nif.Document; import org.aksw.gerbil.transfer.nif.Marking; import org.aksw.gerbil.transfer.nif.data.DocumentImpl; import org.aksw.gerbil.transfer.nif.data.NamedEntity; import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; public class OKEChallengeTask1DatasetTest { @BeforeClass public static void setMatchingsCounterDebugFlag() { MatchingsCounterImpl.setPrintDebugMsg(false); ConfidenceBasedFMeasureCalculator.setPrintDebugMsg(false); ErrorCountingAnnotatorDecorator.setPrintDebugMsg(false); } private static final String TASK1_FILE = "src/test/resources/OKE_Challenge/example_data/task1.ttl"; private static final Document EXPECTED_DOCUMENTS[] = new Document[] { new DocumentImpl( "Florence May Harding studied at a school in Sydney, and with Douglas Robert Dundas , but in effect had no formal training in either botany or art.", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1", Arrays.asList( (Marking) new TypedNamedEntity( 0, 20, new HashSet<String>( Arrays.asList("http://dbpedia.org/resource/Florence_May_Harding", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Florence_May_Harding")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person"))), (Marking) new NamedEntity(34, 6, "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/National_Art_School"), (Marking) new TypedNamedEntity(44, 6, new HashSet<String>(Arrays.asList( "http://dbpedia.org/resource/Sydney", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Sydney")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://ontologydesignpatterns.org/ont/wikipedia/d0.owl#Location"))), (Marking) new TypedNamedEntity( 61, 21, "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Douglas_Robert_Dundas", new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person"))))), new DocumentImpl( "Such notables include James Carville, who was the senior political adviser to Bill Clinton, and Donna Brazile, the campaign manager of the 2000 presidential campaign of Vice-President Al Gore.", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2", Arrays.asList( (Marking) new TypedNamedEntity(22, 14, new HashSet<String>(Arrays.asList( "http://dbpedia.org/resource/James_Carville", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/James_Carville")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person"))), (Marking) new TypedNamedEntity( 57, 17, new HashSet<String>( Arrays.asList("http://dbpedia.org/resource/Political_consulting", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Political_adviser")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Role"))), (Marking) new TypedNamedEntity(78, 12, new HashSet<String>(Arrays.asList( "http://dbpedia.org/resource/Bill_Clinton", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Bill_Clinton")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person"))), (Marking) new TypedNamedEntity(96, 13, new HashSet<String>(Arrays.asList( "http://dbpedia.org/resource/Donna_Brazile", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Donna_Brazile")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person"))), (Marking) new TypedNamedEntity( 115, 16, new HashSet<String>( Arrays.asList("http://dbpedia.org/resource/Campaign_manager", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Campaign_manager")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Role"))), (Marking) new TypedNamedEntity(184, 7, new HashSet<String>(Arrays.asList( "http://dbpedia.org/resource/Al_Gore", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Al_Gore")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person"))))), new DocumentImpl( "The senator received a Bachelor of Laws from the Columbia University.", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3", Arrays.asList( (Marking) new TypedNamedEntity( 4, 7, new HashSet<String>( Arrays.asList("http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Senator_1")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person"))), (Marking) new TypedNamedEntity( 49, 19, new HashSet<String>( Arrays.asList("http://dbpedia.org/resource/Columbia_University", "http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Columbia_University")), new HashSet<String>(Arrays.asList("http://www.w3.org/2002/07/owl#Individual", "http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Organization"))))) }; @Test public void test() throws GerbilException { DatasetConfiguration datasetConfig = new NIFFileDatasetConfig("OKE_Task1", TASK1_FILE, false, ExperimentType.A2KB, null, null); Dataset dataset = datasetConfig.getDataset(ExperimentType.A2KB); Map<String, Document> uriInstanceMapping = new HashMap<String, Document>(EXPECTED_DOCUMENTS.length); for (Document document : EXPECTED_DOCUMENTS) { uriInstanceMapping.put(document.getDocumentURI(), document); } Document expectedDoc; Set<Marking> expectedMarkings; for (Document document : dataset.getInstances()) { Assert.assertTrue(uriInstanceMapping.containsKey(document.getDocumentURI())); expectedDoc = uriInstanceMapping.get(document.getDocumentURI()); // check the text Assert.assertEquals(expectedDoc.getText(), document.getText()); // check the markings Assert.assertEquals("encountered different lengths of expectedMarkings (" + expectedDoc.getMarkings().toString() + ") and the markings got from the reader (" + document.getMarkings().toString() + ").", expectedDoc.getMarkings().size(), document .getMarkings().size()); expectedMarkings = new HashSet<Marking>(expectedDoc.getMarkings()); for (Marking marking : document.getMarkings()) { Assert.assertTrue("Couldn't find the read marking (" + marking + ") in the list of expected markings (" + expectedMarkings.toString() + ").", expectedMarkings.contains(marking)); } } } }