/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.entitylinker; import java.io.IOException; import java.util.List; import opennlp.tools.util.Span; /** * EntityLinkers establish connections to external data to enrich extracted * entities. For instance, for Location entities a linker can be developed to * lookup each found location in a geonames gazateer. Another example may be to * find peoples' names and look them up in a database or active directory. * Intended to return n best matches for any give search, but can also be * implemented as deterministic * * @param <T> A type that extends Span. LinkedSpan and BaseLink are provided to * provide this signature: EntityLinker<LinkedSpan<BaseLink>> as a * default */ public interface EntityLinker<T extends Span> { /** * allows for passing properties through the EntityLinkerFactory into all * impls dynamically. EntityLinker impls should initialize reusable objects * used by the impl in this method. If this is done, any errors will be * captured and thrown by the EntityLinkerFactory. * * @param initializationData the EntityLinkerProperties object that contains * properties needed by the impl, as well as any * other objects required for the impl * @throws java.io.IOException */ void init(EntityLinkerProperties initializationData) throws IOException; /** * Links an entire document of named entities to an external source * * @param doctext the full text of the document * @param tokensBySentence a list of tokens spans that correspond to each sentence. * The outer array refers to the sentence, the inner * array is the tokens for the outer sentence. Similar * in nature to Map of SentenceIndex keys to Listof * tokens as values * @param namesBySentence a list of name spans that correspond to each * sentence. The outer array refers to the sentence, * the inner array refers to the tokens that for the * same sentence.Similar in nature to * Map<SentenceIndex,List<Name Spans For This * Sentence's Tokens>> @ return * @return */ List<T> find(String doctext, Span[] sentences, Span[][] tokensBySentence, Span[][] namesBySentence); /** * Links the names that correspond to the tokens[] spans. The sentenceindex * can be used to get the sentence text and tokens from the text based on the * sentence and token spans. The text is available for additional context. * * @param doctext the full text of the document * @param tokensBySentence a list of tokens spans that correspond to each sentence. * The outer array refers to the sentence, the inner * array is the tokens for the outer sentence. Similar * in nature to Map of SentenceIndex keys to Listof * tokens as values * @param namesBySentence a list of name spans that correspond to each * sentence. The outer array refers to the sentence, * the inner array refers to the tokens that for the * same sentence.Similar in nature to * Map<SentenceIndex,List<Name Spans For This * Sentence's Tokens>> @ return * @param sentenceIndex the index to the sentence span that the tokens[] * Span[] corresponds to * @return */ List<T> find(String doctext, Span[] sentences, Span[][] tokensBySentence, Span[][] namesBySentence, int sentenceIndex); }