/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ package org.aksw.gerbil.dataset.impl.gerdaq; import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.aksw.gerbil.transfer.nif.Document; import org.aksw.gerbil.transfer.nif.Marking; import org.aksw.gerbil.transfer.nif.data.DocumentImpl; import org.aksw.gerbil.transfer.nif.data.NamedEntity; import org.apache.commons.io.Charsets; import org.apache.commons.io.FileUtils; import org.junit.Assert; import org.junit.Test; public class GERDAQDatasetTest { private static final String DATASET_NAME = "test"; @Test public void checkLoadDatasets() throws Exception { File file = File.createTempFile("GERDAQ", ".xml"); FileUtils.write(file, "<?xml version='1.0' encoding='UTF-8'?>" + String.format("%n") + "<dataset><instance>loris <annotation rank_0_id=\"44017\" rank_0_score=\"0.925555555556\" rank_0_title=\"Candle\">candle</annotation> sampler</instance><instance><annotation rank_0_id=\"230699\" rank_0_score=\"0.666666666667\" rank_0_title=\"Conveyancing\">buying land</annotation> and <annotation rank_0_id=\"21883824\" rank_0_score=\"1.0\" rank_0_title=\"Arizona\">arizona</annotation></instance><instance>hip gry pl</instance></dataset>", Charsets.UTF_8); String docUriStart = GERDAQDataset.generateDocumentUri(DATASET_NAME, file.getName()); List<Document> expectedDocuments = Arrays.asList( new DocumentImpl("loris candle sampler", docUriStart + 0, Arrays.asList(new NamedEntity(6, 6, "http://dbpedia.org/resource/Candle"))), new DocumentImpl("buying land and arizona", docUriStart + 1, Arrays.asList(new NamedEntity(0, 11, "http://dbpedia.org/resource/Conveyancing"), new NamedEntity(16, 7, "http://dbpedia.org/resource/Arizona"))), new DocumentImpl("hip gry pl", docUriStart + 2, new ArrayList<Marking>(0))); GERDAQDataset dataset = new GERDAQDataset(file.getAbsolutePath()); try { dataset.setName(DATASET_NAME); dataset.init(); Assert.assertArrayEquals(expectedDocuments.toArray(new Document[3]), dataset.getInstances().toArray(new Document[3])); } finally { dataset.close(); } } }