/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.enhancer.engine.disambiguation.mlt; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ORIGIN; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT; import java.util.SortedMap; import java.util.SortedSet; import org.apache.clerezza.rdf.core.LiteralFactory; import org.apache.clerezza.commons.rdf.Graph; import org.apache.clerezza.commons.rdf.IRI; import org.apache.clerezza.commons.rdf.Literal; import org.apache.stanbol.enhancer.servicesapi.ContentItem; import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; import org.apache.stanbol.enhancer.servicesapi.rdf.Properties; import org.apache.stanbol.entityhub.servicesapi.model.Entity; import org.apache.stanbol.entityhub.servicesapi.model.Text; import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum; import org.apache.stanbol.entityhub.servicesapi.site.Site; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A suggestion of an {@link Entity} for a fise:TextAnnotation processed by the NamedEntityTaggingEngine */ public class Suggestion implements Comparable<Suggestion> { private static final Logger log = LoggerFactory.getLogger(Suggestion.class); private static final LiteralFactory lf = LiteralFactory.getInstance(); private static final IRI ENTITYHUB_SITE = new IRI(RdfResourceEnum.site.getUri()); private IRI entityAnnotation; private IRI entityUri; private Double originalConfidnece; private Entity entity; private Double normalizedDisambiguationScore; private Double disambiguatedConfidence; private String site; private Suggestion(IRI entityAnnotation) { this.entityAnnotation = entityAnnotation; } public Suggestion(Entity entity) { this.entity = entity; this.entityUri = new IRI(entity.getId()); this.site = entity.getSite(); } /** * Allows to create Suggestions from existing fise:TextAnnotation contained in the metadata of the * processed {@link ContentItem} * * @param graph * @param entityAnnotation * @return */ public static Suggestion createFromEntityAnnotation(Graph graph, IRI entityAnnotation) { Suggestion suggestion = new Suggestion(entityAnnotation); suggestion.entityUri = EnhancementEngineHelper.getReference(graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE); if (suggestion.entityUri == null) { // most likely not a fise:EntityAnnotation log.debug("Unable to create Suggestion for EntityAnnotation {} " + "because property {} is not present", entityAnnotation, ENHANCER_ENTITY_REFERENCE); return null; } suggestion.originalConfidnece = EnhancementEngineHelper.get(graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf); if (suggestion.originalConfidnece == null) { log.warn("EntityAnnotation {} does not define a value for " + "property {}. Will use '0' as fallback", entityAnnotation, ENHANCER_CONFIDENCE); suggestion.originalConfidnece = 0.0; } suggestion.site = EnhancementEngineHelper.getString(graph, entityAnnotation, ENTITYHUB_SITE); if(suggestion.site == null){ //STANBOL-1411: fall back to fise:orign suggestion.site = getOrigin(graph, entityAnnotation); } // NOTE: site might be NULL return suggestion; } /** * The URI of the fise:EntityAnnotation representing this suggestion in the * {@link ContentItem#getMetadata() metadata} of the processed {@link ContentItem}. This will be * <code>null</code> if this Suggestion was created as part of the Disambiguation process and was not * present in the metadata of the content item before the disambiguation. * * @return the URI of the fise:EntityAnnotation or <code>null</code> if not present. */ public IRI getEntityAnnotation() { return entityAnnotation; } /** * Allows to set the URI of the fise:EntityAnnotation. This is required if the original enhancement * structure shared one fise:EntityAnnotation instance for two fise:TextAnnotations (e.g. because both * TextAnnotations had the exact same value for fise:selected-text). After disambiguation it is necessary * to 'clone' fise:EntityAnnotations like that to give them different fise:confidence values. Because of * that it is supported to set the new URI of the cloned fise:EntityAnnotation. * * @param uri * the uri of the cloned fise:EntityAnnotation */ public void setEntityAnnotation(IRI uri) { this.entityAnnotation = uri; } /** * The URI of the Entity (MUST NOT be <code>null</code>) * * @return the URI */ public IRI getEntityUri() { return entityUri; } /** * The original confidence of the fise:EntityAnnotation or <code>null</code> if not available. * * @return */ public Double getOriginalConfidnece() { return originalConfidnece; } /** * The {@link Entity} or <code>null</code> if not available. For Suggestions that are created based on * fise:EntityAnnotations the Entity is not available. Entities might be loaded as part of the * Disambiguation process. * * @return the {@link Entity} or <code>null</code> if not available */ public Entity getEntity() { return entity; } /** * The score of the disambiguation. This is just the score of the disambiguation that is not yet combined * with the {@link #getOriginalConfidnece()} to become the {@link #getDisambiguatedConfidence()} * * @return the disambiguation score */ public Double getNormalizedDisambiguationScore() { return normalizedDisambiguationScore; } /** * The confidence after disambiguation. Will be <code>null</code> at the beginning * * @return the disambiguated confidence or <code>null</code> if not yet disambiguated */ public Double getDisambiguatedConfidence() { return disambiguatedConfidence; } /** * The name of the Entityhub {@link Site} the suggested Entity is managed. * Both <code>entityhub:site</code> and <code>fise:orign</code> are * considered as sites (see STANBOL-1411). * * @return the name of the Entityhub {@link Site} */ public String getSite() { return site; } /** * Setter for the normalized [0..1] score of the disambiguation * * @param normalizedDisambiguationScore */ public void setNormalizedDisambiguationScore(Double normalizedDisambiguationScore) { this.normalizedDisambiguationScore = normalizedDisambiguationScore; } /** * Setter for the confidence after disambiguation * * @param disambiguatedConfidence */ public void setDisambiguatedConfidence(Double disambiguatedConfidence) { this.disambiguatedConfidence = disambiguatedConfidence; } @Override public int hashCode() { return entityUri.hashCode(); } @Override public boolean equals(Object obj) { return obj instanceof Suggestion && ((Suggestion) obj).entityUri.equals(entityUri); } /** * Compares based on the {@link #getDisambiguatedConfidence()} (if present) and falls back to the * {@link #getOriginalConfidnece()}. If the original confidence value is not present or both Suggestions * do have the same confidence the natural order of the Entities URI is used. This also ensures * <code>(x.compareTo(y)==0) == (x.equals(y))</code> and allows to use this class with {@link SortedMap} * and {@link SortedSet} implementations. * <p> */ @Override public int compareTo(Suggestion other) { int result; if (disambiguatedConfidence != null && other.disambiguatedConfidence != null) { result = other.disambiguatedConfidence.compareTo(disambiguatedConfidence); } else if (other.originalConfidnece != null && originalConfidnece != null) { result = other.originalConfidnece.compareTo(originalConfidnece); } else { result = 0; } // ensure (x.compareTo(y)==0) == (x.equals(y)) return result == 0 ? entityUri.getUnicodeString().compareTo(other.entityUri.getUnicodeString()) : result; } private static String getOrigin(Graph graph, IRI entityAnnotation) { IRI uOrigin = EnhancementEngineHelper.getReference(graph, entityAnnotation, ENHANCER_ORIGIN); if (uOrigin != null) { return uOrigin.getUnicodeString(); } else { String sOrigin = EnhancementEngineHelper.getString(graph, entityAnnotation, ENHANCER_ORIGIN); if (sOrigin != null) { return sOrigin; } else { Literal lOrigin = EnhancementEngineHelper.get(graph, entityAnnotation, ENHANCER_ORIGIN, Literal.class, lf); if (lOrigin != null) { return lOrigin.getLexicalForm(); } else { return null; } } } } }