/** * This file is part of General Entity Annotator Benchmark. * * General Entity Annotator Benchmark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * General Entity Annotator Benchmark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>. */ // NERD Annotator - It triggers queries to the NERD framework // http://nerd.eurecom.fr and it parses the results // // Copyright 2014 EURECOM // // Authors: // Giuseppe Rizzo <giuse.rizzo@gmail.com> // // Licensed under ... package org.aksw.gerbil.bat.annotator; import fr.eurecom.nerd.client.NERD; import fr.eurecom.nerd.client.schema.Entity; import fr.eurecom.nerd.client.type.DocumentType; import fr.eurecom.nerd.client.type.ExtractorType; import fr.eurecom.nerd.client.type.GranularityType; import it.unipi.di.acube.batframework.data.Annotation; import it.unipi.di.acube.batframework.data.Mention; import it.unipi.di.acube.batframework.data.ScoredAnnotation; import it.unipi.di.acube.batframework.data.ScoredTag; import it.unipi.di.acube.batframework.data.Tag; import it.unipi.di.acube.batframework.problems.Sa2WSystem; import it.unipi.di.acube.batframework.utils.AnnotationException; import it.unipi.di.acube.batframework.utils.ProblemReduction; import it.unipi.di.acube.batframework.utils.WikipediaApiInterface; import java.util.HashSet; import java.util.List; import org.aksw.gerbil.bat.converter.DBpediaToWikiId; import org.aksw.gerbil.config.GerbilConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import com.google.common.collect.Sets; /** * NERD Annotator - It triggers queries to the NERD framework http://nerd.eurecom.fr and it parses the results * * @author Giuseppe Rizzo (giuse.rizzo@gmail.com) * */ @Deprecated public class NERDAnnotator implements Sa2WSystem { private static final Logger LOGGER = LoggerFactory.getLogger(NERDAnnotator.class); public static final String NAME = "NERD-ML"; private static String NERD_API_PROPERTY_NAME = "org.aksw.gerbil.annotators.nerd.api"; private final String NERD_API = GerbilConfiguration.getInstance().getString(NERD_API_PROPERTY_NAME); private String key; @Autowired private WikipediaApiInterface wikiApi; /** * Shouldn't be used until we have finished porting the project to Spring. */ @Deprecated public NERDAnnotator(String key) { this.key = key; } public NERDAnnotator(WikipediaApiInterface wikiApi, String key) { this.key = key; this.wikiApi = wikiApi; } @Override public String getName() { return NAME; } @Override public HashSet<Annotation> solveA2W(String text) throws AnnotationException { return ProblemReduction.Sa2WToA2W(solveSa2W(text), Float.MIN_VALUE); } @Override public HashSet<Tag> solveC2W(String text) throws AnnotationException { return ProblemReduction.A2WToC2W(solveA2W(text)); } @Override public long getLastAnnotationTime() { return -1; } @Override public HashSet<ScoredTag> solveSc2W(String text) throws AnnotationException { return ProblemReduction.Sa2WToSc2W(this.solveSa2W(text)); } @Override public HashSet<ScoredAnnotation> solveSa2W(String text) throws AnnotationException { return getNERDAnnotations(text); } @Override public HashSet<Annotation> solveD2W(String text, HashSet<Mention> mentions) throws AnnotationException { return ProblemReduction.Sa2WToD2W(getNERDAnnotations(text), mentions, 0.1f); // HashSet<ScoredAnnotation> anns = getNERDAnnotations(text); // HashSet<Annotation> result = new HashSet<Annotation>(); // // //FIXME // //naive implementation that iterates through the list of mentions and gets, // //if available, the wiki link for that mention // for (Mention m : mentions) { // for (ScoredAnnotation a : anns) // { // if( m.getPosition() == a.getPosition() ) // result.add(new Annotation(a.getPosition(), a.getLength(), a.getConcept())); // } // } // // return result; } /** * Send request to NERD and parse the response as a set of scored annotations. * * @param text * the text to send */ public HashSet<ScoredAnnotation> getNERDAnnotations(String text) { HashSet<ScoredAnnotation> annotations = Sets.newHashSet(); try { // lastTime = Calendar.getInstance().getTimeInMillis(); LOGGER.debug("shipping to NERD the text to annotate"); NERD nerd = new NERD(NERD_API, key); List<Entity> entities = nerd.annotate(ExtractorType.NERDML, DocumentType.PLAINTEXT, text, GranularityType.OEN, 60L, true, true); LOGGER.debug("NERD has found {} entities", entities.size()); for (Entity e : entities) { int id = DBpediaToWikiId.getId(wikiApi, e.getUri()); annotations.add(new ScoredAnnotation( e.getStartChar(), e.getEndChar() - e.getStartChar(), id, new Float(e.getConfidence())) ); } } catch (Exception e) { e.printStackTrace(); // TODO // fix the error handling in order to closely check what is the source of the error throw new AnnotationException("An error occurred while querying " + this.getName() + " API. Message: " + e.getMessage()); } return annotations; } }