/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ package org.aksw.gerbil.dataid; import java.io.ByteArrayOutputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Calendar; import java.util.Iterator; import java.util.List; import org.aksw.gerbil.database.ResultNameToIdMapping; import org.aksw.gerbil.datatypes.ExperimentTaskResult; import org.aksw.gerbil.semantic.vocabs.CUBE; import org.aksw.gerbil.semantic.vocabs.GERBIL; import org.aksw.gerbil.web.ExperimentTaskStateHelper; import org.apache.jena.riot.RDFDataMgr; import com.carrotsearch.hppc.IntDoubleOpenHashMap; import com.github.jsonldjava.jena.JenaJSONLD; import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; import com.hp.hpl.jena.vocabulary.XSD; public class DataIDGenerator { private static final String EXPERIMENT_PREFIX = "experiment?id="; private static final String EXPERIMENT_TASK_PREFIX = "experimentTask_"; private static final String DATASET_DATAID = "dataId/corpora/"; private static final String ANNOTATOR_DATAID = "dataId/annotators/"; private static final String DATAID_EXTENSION = ""; private String gerbilURL; public DataIDGenerator(String gerbilURL) { this.gerbilURL = gerbilURL; } public Model generateDataIDModel() { // create an empty JENA Model Model model = ModelFactory.createDefaultModel(); // setting namespaces model.setNsPrefix("gerbil", GERBIL.getURI()); model.setNsPrefix("rdf", RDF.getURI()); model.setNsPrefix("rdfs", RDFS.getURI()); model.setNsPrefix("xsd", XSD.getURI()); model.setNsPrefix("qb", CUBE.getURI()); return model; } public String createDataIDModel(List<ExperimentTaskResult> results, String eID) { // If the experiment is not existing (== there are no results), return // an empty String if (results.size() == 0) { return ""; } Model model = generateDataIDModel(); addToModel(model, results, eID); // writing dataid result to output OutputStream o = new ByteArrayOutputStream(); // creating json-ld output format RDFDataMgr.write(o, model, JenaJSONLD.JSONLD); return o.toString(); } public void addToModel(Model model, List<ExperimentTaskResult> results, String eID) { if (results.size() == 0) { return; } Resource experiment = createExperimentResource(model, eID); boolean first = true; Iterator<ExperimentTaskResult> resultIterator = results.iterator(); ExperimentTaskResult result; // iterating over the experiments while (resultIterator.hasNext()) { result = resultIterator.next(); // If this is the first experiment result, use it to get further // properties of the experiment (matching, ...) if (first) { Resource r = GERBIL.getExperimentTypeResource(result.type); if (r != null) { experiment.addProperty(GERBIL.experimentType, r); } r = GERBIL.getMatchingResource(result.matching); if (r != null) { experiment.addProperty(GERBIL.matching, r); } first = false; } // create experiment task addExperimentTask(model, result, experiment); } } public Resource createExperimentResource(Model model, String eID) { // create experiment resource Resource experiment = model.createResource(gerbilURL + EXPERIMENT_PREFIX + eID); experiment.addProperty(RDF.type, CUBE.Dataset); experiment.addProperty(RDF.type, GERBIL.Experiment); model.add(experiment, RDFS.label, "Experiment " + eID); model.add(experiment, CUBE.structure, GERBIL.DSD); return experiment; } public void addExperimentTask(Model model, ExperimentTaskResult result, Resource experiment) { addExperimentTask(model, result, experiment, null); } public void addExperimentTask(Model model, ExperimentTaskResult result, Resource experiment, Resource superExpTask) { List<Resource> experimentTasks = new ArrayList<Resource>(); createExperimentTask(model, result, superExpTask, experimentTasks); linkTasksToExperiment(model, experiment, experimentTasks); } public void linkTasksToExperiment(Model model, Resource experiment, List<Resource> experimentTasks) { for (Resource experimentTask : experimentTasks) { model.add(experimentTask, CUBE.dataset, experiment.getURI()); } } public void createExperimentTask(Model model, ExperimentTaskResult result, Resource superExpTask, List<Resource> experimentTasks) { // create Resource Resource experimentTask = model.createResource(generateExperimentTaskUri(result.idInDb)); experimentTasks.add(experimentTask); if (model.containsResource(experimentTask)) { return; } experimentTask.addProperty(RDF.type, CUBE.Observation); // add annotator and dataset experimentTask.addProperty(GERBIL.annotator, gerbilURL + ANNOTATOR_DATAID + DataIDUtils.treatsNames(result.dataset) + DATAID_EXTENSION); experimentTask.addProperty(GERBIL.dataset, gerbilURL + DATASET_DATAID + DataIDUtils.treatsNames(result.annotator) + DATAID_EXTENSION); // set the status of this task model.add(experimentTask, GERBIL.statusCode, model.createTypedLiteral(result.state)); if (superExpTask != null) { model.add(experimentTask, GERBIL.subExperimentOf, superExpTask); } // If this task has been finished if (ExperimentTaskStateHelper.taskFinished(result)) { // creating and setting literals for the current experiment model.add(experimentTask, GERBIL.microF1, model.createTypedLiteral(String.valueOf(result.getMicroF1Measure()), XSDDatatype.XSDdecimal)); model.add(experimentTask, GERBIL.microPrecision, model.createTypedLiteral(String.valueOf(result.getMicroPrecision()), XSDDatatype.XSDdecimal)); model.add(experimentTask, GERBIL.microRecall, model.createTypedLiteral(String.valueOf(result.getMicroRecall()), XSDDatatype.XSDdecimal)); model.add(experimentTask, GERBIL.macroF1, model.createTypedLiteral(String.valueOf(result.getMacroF1Measure()), XSDDatatype.XSDdecimal)); model.add(experimentTask, GERBIL.macroPrecision, model.createTypedLiteral(String.valueOf(result.getMacroPrecision()), XSDDatatype.XSDdecimal)); model.add(experimentTask, GERBIL.macroRecall, model.createTypedLiteral(String.valueOf(result.getMacroRecall()), XSDDatatype.XSDdecimal)); model.add(experimentTask, GERBIL.errorCount, model.createTypedLiteral(String.valueOf(result.errorCount))); if (result.hasAdditionalResults()) { IntDoubleOpenHashMap additionalResults = result.getAdditionalResults(); String propertyUri; ResultNameToIdMapping mapping = ResultNameToIdMapping.getInstance(); for (int i = 0; i < additionalResults.allocated.length; ++i) { if (additionalResults.allocated[i]) { propertyUri = mapping.getResultName(additionalResults.keys[i]); if (propertyUri != null) { propertyUri = GERBIL.getURI() + propertyUri.replace(" ", "_"); model.add(experimentTask, model.createProperty(propertyUri), model.createTypedLiteral( String.valueOf(additionalResults.values[i]), XSDDatatype.XSDdecimal)); } } } } if (result.hasSubTasks()) { for (ExperimentTaskResult subResult : result.getSubTasks()) { createExperimentTask(model, subResult, experimentTask, experimentTasks); } } } Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(result.timestamp); model.add(experimentTask, GERBIL.timestamp, model.createTypedLiteral(cal)); } public String generateExperimentTaskUri(int taskId) { return gerbilURL + EXPERIMENT_TASK_PREFIX + taskId; } }