/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.enhancer.engines.opennlp.impl; import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.NER_ANNOTATION; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import opennlp.tools.namefind.NameFinderME; import opennlp.tools.namefind.TokenNameFinderModel; import opennlp.tools.sentdetect.SentenceDetectorME; import opennlp.tools.sentdetect.SentenceModel; import opennlp.tools.tokenize.Tokenizer; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.Span; import org.apache.clerezza.commons.rdf.Language; import org.apache.clerezza.rdf.core.LiteralFactory; import org.apache.clerezza.commons.rdf.Graph; import org.apache.clerezza.commons.rdf.IRI; import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl; import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl; import org.apache.commons.lang.StringUtils; import org.apache.stanbol.commons.opennlp.OpenNLP; import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider; import org.apache.stanbol.enhancer.nlp.NlpAnnotations; import org.apache.stanbol.enhancer.nlp.model.AnalysedText; import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils; import org.apache.stanbol.enhancer.nlp.model.Chunk; import org.apache.stanbol.enhancer.nlp.model.Section; import org.apache.stanbol.enhancer.nlp.model.Sentence; import org.apache.stanbol.enhancer.nlp.model.Token; import org.apache.stanbol.enhancer.nlp.model.annotation.Value; import org.apache.stanbol.enhancer.nlp.ner.NerTag; import org.apache.stanbol.enhancer.servicesapi.Blob; import org.apache.stanbol.enhancer.servicesapi.ContentItem; import org.apache.stanbol.enhancer.servicesapi.EngineException; import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; import org.apache.stanbol.enhancer.servicesapi.InvalidContentException; import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper; import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Core of the NER EnhancementEngine(s), separated from the OSGi service to make * it easier to test this. */ public abstract class NEREngineCore extends AbstractEnhancementEngine<IOException,RuntimeException> implements EnhancementEngine { protected static final String TEXT_PLAIN_MIMETYPE = "text/plain"; /** * Contains the only supported mimetype {@link #TEXT_PLAIN_MIMETYPE} */ protected static final Set<String> SUPPORTED_MIMETYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE); private final Logger log = LoggerFactory.getLogger(getClass()); protected OpenNLP openNLP; protected NEREngineConfig config; /** Comments about our models */ public static final Map<String, String> DATA_FILE_COMMENTS; static { DATA_FILE_COMMENTS = new HashMap<String, String>(); DATA_FILE_COMMENTS.put("Default data files", "provided by the org.apache.stanbol.defaultdata bundle"); } /** * If used sub classes MUST ensure that {@link #openNLP} and {@link #config} * are set before calling {@link #canEnhance(ContentItem)} or * {@link #computeEnhancements(ContentItem)} */ protected NEREngineCore(){} NEREngineCore(OpenNLP openNLP, NEREngineConfig config) throws InvalidFormatException, IOException{ if(openNLP == null){ throw new IllegalArgumentException("The parsed OpenNLP instance MUST NOT be NULL!"); } if(config == null){ throw new IllegalArgumentException("The parsed NER engine configuration MUST NOT be NULL!"); } this.openNLP = openNLP; this.config = config; } NEREngineCore(DataFileProvider dfp,NEREngineConfig config) throws InvalidFormatException, IOException { this(new OpenNLP(dfp),config); } public void computeEnhancements(ContentItem ci) throws EngineException { //first check the langauge before processing the content (text) String language = extractLanguage(ci); if(language == null){ throw new IllegalStateException("Unable to extract Language for " + "ContentItem "+ci.getUri()+": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!"); } if(!isNerModel(language)){ throw new IllegalStateException("For the language '"+language+"' of ContentItem "+ci.getUri() + " no NER model is configured: This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!"); } final AnalysedText at = AnalysedTextUtils.getAnalysedText(ci); //validate data in the AnalysedText final String text; if(at != null && at.getTokens().hasNext()){ //if the AnalysedText is present and tokens are present if(log.isDebugEnabled()){ log.debug("computeEnhancements from AnalysedText ContentPart of ContentItem {}: text={}", ci.getUri().getUnicodeString(), StringUtils.abbreviate(at.getSpan(), 100)); } text = null; } else { //no AnalysedText with tokens ... //fallback to processing the plain text is still supported Entry<IRI,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES); if(contentPart == null){ throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE+"' found for ContentItem "+ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!"); } try { text = ContentItemHelper.getText(contentPart.getValue()); } catch (IOException e) { throw new InvalidContentException(this, ci, e); } if (text.trim().length() == 0) { // TODO: make the length of the data a field of the ContentItem // interface to be able to filter out empty items in the canEnhance // method log.warn("ContentPart {} of ContentItem {} does not contain any text" + "to extract knowledge from in ContentItem {}", contentPart.getKey(),ci); return; } if(log.isDebugEnabled()){ log.debug("computeEnhancements from ContentPart {} of ContentItem {}: text={}", new Object[]{contentPart.getKey(),ci.getUri().getUnicodeString(), StringUtils.abbreviate(text, 100)}); } } try { if(config.isProcessedLangage(language)){ for (String defaultModelType : config.getDefaultModelTypes()) { TokenNameFinderModel nameFinderModel = openNLP.getNameModel(defaultModelType, language); if(nameFinderModel == null){ log.info("No NER Model for {} and language {} available!",defaultModelType,language); } else { findNamedEntities(ci, at, text, language, nameFinderModel); } } } //else do not use default models for languages other than the processed one //process for additional models for(String additionalModel : config.getSpecificNerModles(language)){ TokenNameFinderModel nameFinderModel; try { nameFinderModel = openNLP.getModel(TokenNameFinderModel.class, additionalModel, null); findNamedEntities(ci, at, text, language, nameFinderModel); } catch (IOException e) { log.warn("Unable to load TokenNameFinderModel model for language '"+language + "' (model: "+additionalModel+")",e); } catch (RuntimeException e){ log.warn("Error while creating ChunkerModel for language '"+language + "' (model: "+additionalModel+")",e); } } } catch (Exception e) { if (e instanceof RuntimeException) { throw (RuntimeException)e; } else { throw new EngineException(this, ci, e); } } } protected void findNamedEntities(final ContentItem ci, final AnalysedText at, final String text, final String lang, final TokenNameFinderModel nameFinderModel) { if (ci == null) { throw new IllegalArgumentException("Parsed ContentItem MUST NOT be NULL"); } if (at == null && text == null) { log.warn("NULL was parsed as AnalysedText AND Text for content item " + ci.getUri() + ". One of the two MUST BE present! -> call ignored"); return; } final Language language; if(lang != null && !lang.isEmpty()){ language = new Language(lang); } else { language = null; } if(log.isDebugEnabled()){ log.debug("findNamedEntities model={}, language={}, text=", new Object[]{ nameFinderModel, language, StringUtils.abbreviate(at != null ? at.getSpan() : text, 100) }); } LiteralFactory literalFactory = LiteralFactory.getInstance(); Graph g = ci.getMetadata(); Map<String,List<NameOccurrence>> entityNames; if(at != null){ entityNames = extractNameOccurrences(nameFinderModel, at, lang); } else { entityNames = extractNameOccurrences(nameFinderModel, text,lang); } //lock the ContentItem while writing the RDF data for found Named Entities ci.getLock().writeLock().lock(); try { Map<String,IRI> previousAnnotations = new LinkedHashMap<String,IRI>(); for (Map.Entry<String,List<NameOccurrence>> nameInContext : entityNames.entrySet()) { String name = nameInContext.getKey(); List<NameOccurrence> occurrences = nameInContext.getValue(); IRI firstOccurrenceAnnotation = null; for (NameOccurrence occurrence : occurrences) { IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this); g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(name, language))); g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.context, language))); if(occurrence.type != null){ g.add(new TripleImpl(textAnnotation, DC_TYPE, occurrence.type)); } if(occurrence.confidence != null){ g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory .createTypedLiteral(occurrence.confidence))); } if (occurrence.start != null && occurrence.end != null) { g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory .createTypedLiteral(occurrence.start))); g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory .createTypedLiteral(occurrence.end))); } // add the subsumption relationship among occurrences of the same // name if (firstOccurrenceAnnotation == null) { // check already extracted annotations to find a first most // specific occurrence for (Map.Entry<String,IRI> entry : previousAnnotations.entrySet()) { if (entry.getKey().contains(name)) { // we have found a most specific previous // occurrence, use it as subsumption target firstOccurrenceAnnotation = entry.getValue(); g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation)); break; } } if (firstOccurrenceAnnotation == null) { // no most specific previous occurrence, I am the first, // most specific occurrence to be later used as a target firstOccurrenceAnnotation = textAnnotation; previousAnnotations.put(name, textAnnotation); } } else { // I am referring to a most specific first occurrence of the // same name g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation)); } } } } finally { ci.getLock().writeLock().unlock(); } } @Deprecated public Collection<String> extractPersonNames(String text) { return extractPersonNames(text, "en"); } public Collection<String> extractPersonNames(String text,String lang) { return extractNames(getNameModel("person",lang),text); } @Deprecated public Collection<String> extractLocationNames(String text) { return extractLocationNames(text,"en"); } public Collection<String> extractLocationNames(String text,String lang) { return extractNames(getNameModel("location",lang), text); } @Deprecated public Collection<String> extractOrganizationNames(String text) { return extractOrganizationNames(text,"en"); } public Collection<String> extractOrganizationNames(String text,String lang) { return extractNames(getNameModel("organization",lang), text); } /** * extracts the PersonName occurrences for English language texts * @param text * @return * @deprecated use {@link #extractLocationNameOccurrences(String,String)} instead */ @Deprecated public Map<String,List<NameOccurrence>> extractPersonNameOccurrences(String text) { return this.extractPersonNameOccurrences(text, "en"); } public Map<String,List<NameOccurrence>> extractPersonNameOccurrences(String text, String lang) { return extractNameOccurrences(getNameModel("person",lang), text, lang); } /** * extracts the LocationName occurrences for English language texts * @param text * @return * @deprecated use {@link #extractLocationNameOccurrences(String,String)} instead */ @Deprecated public Map<String,List<NameOccurrence>> extractLocationNameOccurrences(String text) { return extractLocationNameOccurrences(text, "en"); } public Map<String,List<NameOccurrence>> extractLocationNameOccurrences(String text,String lang) { return extractNameOccurrences(getNameModel("location",lang), text,lang); } /** * extracts the OrganizationName occurrences for English language texts * @param text * @return * @deprecated use {@link #extractOrganizationNamesOccurrences(String,String)} instead */ @Deprecated public Map<String,List<NameOccurrence>> extractOrganizationNameOccurrences(String text) { return extractOrganizationNameOccurrences(text,"en"); } public Map<String,List<NameOccurrence>> extractOrganizationNameOccurrences(String text,String lang) { return extractNameOccurrences(getNameModel("organization",lang), text,lang); } protected Collection<String> extractNames(TokenNameFinderModel nameFinderModel, String text) { return extractNameOccurrences(nameFinderModel, text, nameFinderModel.getLanguage()).keySet(); } /** * Gets/builds a TokenNameFinderModel by using {@link #openNLP} and throws * {@link IllegalStateException}s in case the model could not be built or * the data for the model where not found. * @param the type of the named finder model * @param language the language for the model * @return the model or an {@link IllegalStateException} if not available */ private TokenNameFinderModel getNameModel(String type,String language) { try { TokenNameFinderModel model = openNLP.getNameModel(type, language); if(model != null){ return model; } else { throw new IllegalStateException(String.format( "Unable to built Model for extracting %s from '%s' language " + "texts because the model data could not be loaded.", type,language)); } } catch (InvalidFormatException e) { throw new IllegalStateException(String.format( "Unable to built Model for extracting %s from '%s' language texts.", type,language),e); } catch (IOException e) { throw new IllegalStateException(String.format( "Unable to built Model for extracting %s from '%s' language texts.", type,language),e); } } /** * Loads the {@link SentenceModel} for the parsed language or * English as fallback if one for the language is not available * @param language * @return */ private SentenceModel getSentenceModel(String language) { try { SentenceModel model = openNLP.getSentenceModel(language); if(model != null){ return model; } else { //fallback to english log.info("No sentence detection modle for {}. fallback to English"); model = openNLP.getSentenceModel("en"); if(model == null){ throw new IllegalStateException(String.format( "Unable to built Model for extracting sentences neither for '%s' " + "nor the fallback language 'en'.", language)); } else { return model; } } } catch (InvalidFormatException e) { throw new IllegalStateException(String.format( "Unable to built Model for extracting sentences from '%s' language texts.", language),e); } catch (IOException e) { throw new IllegalStateException(String.format( "Unable to built Model for extracting sentences from '%s' language texts.", language),e); } } /** * THis method extracts NamedEntity occurrences by using existing {@link Token}s and * {@link Sentence}s in the parsed {@link AnalysedText}. * @param nameFinderModel the model used to find NamedEntities * @param at the Analysed Text * @param language the language of the text * @return the found named Entity Occurrences */ protected Map<String,List<NameOccurrence>> extractNameOccurrences(TokenNameFinderModel nameFinderModel, AnalysedText at, String language) { // version with explicit sentence endings to reflect heading / paragraph // structure of an HTML or PDF document converted to text NameFinderME finder = new NameFinderME(nameFinderModel); Map<String,List<NameOccurrence>> nameOccurrences = new LinkedHashMap<String,List<NameOccurrence>>(); List<Section> sentences = new ArrayList<Section>(); //Holds the tokens of the previouse (pos 0) current (pos 1) and next (pos 2) sentence AnalysedTextUtils.appandToList(at.getSentences(), sentences); if(sentences.isEmpty()){ //no sentence annotations sentences.add(at); //process as a single section } for (int i=0;i<sentences.size();i++) { String sentence = sentences.get(i).getSpan(); // build a context by concatenating three sentences to be used for // similarity ranking / disambiguation + contextual snippet in the // extraction structure List<String> contextElements = new ArrayList<String>(); contextElements.add(sentence); //three sentences as context String context = at.getSpan().substring( sentences.get(Math.max(0, i-1)).getStart(), sentences.get(Math.min(sentences.size()-1, i+1)).getEnd()); // get the tokens, words of the current sentence List<Token> tokens = new ArrayList<Token>(32); List<String> words = new ArrayList<String>(32); for(Iterator<Token> it =sentences.get(i).getTokens();it.hasNext();){ Token t = it.next(); tokens.add(t); words.add(t.getSpan()); } Span[] nameSpans = finder.find(words.toArray(new String[words.size()])); double[] probs = finder.probs(); //int lastStartPosition = 0; for (int j = 0; j < nameSpans.length; j++) { String name = at.getSpan().substring(tokens.get(nameSpans[j].getStart()).getStart(), tokens.get(nameSpans[j].getEnd()-1).getEnd()); Double confidence = 1.0; for (int k = nameSpans[j].getStart(); k < nameSpans[j].getEnd(); k++) { confidence *= probs[k]; } int start = tokens.get(nameSpans[j].getStart()).getStart(); int end = start + name.length(); NerTag nerTag = config.getNerTag(nameSpans[j].getType()); //create the occurrence for writing fise:TextAnnotations NameOccurrence occurrence = new NameOccurrence(name, start, end, nerTag.getType(), context, confidence); List<NameOccurrence> occurrences = nameOccurrences.get(name); if (occurrences == null) { occurrences = new ArrayList<NameOccurrence>(); } occurrences.add(occurrence); nameOccurrences.put(name, occurrences); //add also the NerAnnotation to the AnalysedText Chunk chunk = at.addChunk(start, end); //TODO: build AnnotationModel based on the configured Mappings chunk.addAnnotation(NER_ANNOTATION, Value.value(nerTag, confidence)); } } finder.clearAdaptiveData(); log.debug("{} name occurrences found: {}", nameOccurrences.size(), nameOccurrences); return nameOccurrences; } protected Map<String,List<NameOccurrence>> extractNameOccurrences(TokenNameFinderModel nameFinderModel, String text, String language) { // version with explicit sentence endings to reflect heading / paragraph // structure of an HTML or PDF document converted to text String textWithDots = text.replaceAll("\\n\\n", ".\n"); text = removeNonUtf8CompliantCharacters(text); SentenceDetectorME sentenceDetector = new SentenceDetectorME(getSentenceModel("en")); Span[] sentenceSpans = sentenceDetector.sentPosDetect(textWithDots); NameFinderME finder = new NameFinderME(nameFinderModel); Tokenizer tokenizer = openNLP.getTokenizer(language); Map<String,List<NameOccurrence>> nameOccurrences = new LinkedHashMap<String,List<NameOccurrence>>(); for (int i = 0; i < sentenceSpans.length; i++) { String sentence = sentenceSpans[i].getCoveredText(text).toString().trim(); // build a context by concatenating three sentences to be used for // similarity ranking / disambiguation + contextual snippet in the // extraction structure List<String> contextElements = new ArrayList<String>(); if (i > 0) { CharSequence previousSentence = sentenceSpans[i - 1].getCoveredText(text); contextElements.add(previousSentence.toString().trim()); } contextElements.add(sentence.trim()); if (i + 1 < sentenceSpans.length) { CharSequence nextSentence = sentenceSpans[i + 1].getCoveredText(text); contextElements.add(nextSentence.toString().trim()); } String context = StringUtils.join(contextElements, " "); // extract the names in the current sentence and // keep them store them with the current context Span[] tokenSpans = tokenizer.tokenizePos(sentence); String[] tokens = Span.spansToStrings(tokenSpans, sentence); Span[] nameSpans = finder.find(tokens); double[] probs = finder.probs(); //int lastStartPosition = 0; for (int j = 0; j < nameSpans.length; j++) { String name = sentence.substring(tokenSpans[nameSpans[j].getStart()].getStart(), tokenSpans[nameSpans[j].getEnd()-1].getEnd()); //NOTE: With OpenNLP 1.6 the probability is now stored in the span double prob = nameSpans[j].getProb(); //prob == 0.0 := unspecified Double confidence = prob != 0.0 ? Double.valueOf(prob) : null; if(confidence == null){ //fall back to the old if it is not set. for (int k = nameSpans[j].getStart(); k < nameSpans[j].getEnd(); k++) { prob *= probs[k]; } confidence = Double.valueOf(prob); } else if(confidence < 0.5d){ //It looks like as if preceptron based models do return //invalid probabilities. As it is expected the Named Entities //with a probability < 50% are not even returned by finder.find(..) //we will just ignore confidence values < 0.5 here confidence = null; } int start = tokenSpans[nameSpans[j].getStart()].getStart(); int absoluteStart = sentenceSpans[i].getStart() + start; int absoluteEnd = absoluteStart + name.length(); NerTag nerTag = config.getNerTag(nameSpans[j].getType()); NameOccurrence occurrence = new NameOccurrence(name, absoluteStart, absoluteEnd, nerTag.getType(),context, confidence); List<NameOccurrence> occurrences = nameOccurrences.get(name); if (occurrences == null) { occurrences = new ArrayList<NameOccurrence>(); } occurrences.add(occurrence); nameOccurrences.put(name, occurrences); } } finder.clearAdaptiveData(); log.debug("{} name occurrences found: {}", nameOccurrences.size(), nameOccurrences); return nameOccurrences; } public int canEnhance(ContentItem ci) { if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null && isNerModel(extractLanguage(ci))){ return ENHANCE_ASYNC; } else { return CANNOT_ENHANCE; } } /** * Remove non UTF-8 compliant characters (typically control characters) so has to avoid polluting the * annotation graph with snippets that are not serializable as XML. */ protected static String removeNonUtf8CompliantCharacters(final String text) { if (null == text) { return null; } StringBuilder sb = null; //initialised on the first replacement for (int i = 0; i < text.length(); i++) { int ch = text.codePointAt(i); // remove any characters outside the valid UTF-8 range as well as all control characters // except tabs and new lines //NOTE: rewesten (2012-11-21) replaced the original check with the one // found at http://blog.mark-mclaren.info/2007/02/invalid-xml-characters-when-valid-utf8_5873.html if (!((ch == 0x9) || (ch == 0xA) || (ch == 0xD) || ((ch >= 0x20) && (ch <= 0xD7FF)) || ((ch >= 0xE000) && (ch <= 0xFFFD)) || ((ch >= 0x10000) && (ch <= 0x10FFFF)))){ if(sb == null){ sb = new StringBuilder(text); } sb.setCharAt(i, ' '); } } return sb == null ? text : sb.toString(); } /** * Extracts the language of the parsed ContentItem by using * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and * {@link #defaultLang} as default * @param ci the content item * @return the language */ private String extractLanguage(ContentItem ci) { String lang = EnhancementEngineHelper.getLanguage(ci); if(lang != null){ return lang; } else { log.info("Unable to extract language for ContentItem %s!",ci.getUri().getUnicodeString()); log.info(" ... return '{}' as default",config.getDefaultLanguage()); return config.getDefaultLanguage(); } } /** * This Method checks if this configuration does have a NER model for the * parsed language. This checks if the pased language * {@link #isProcessedLangage(String)} and any {@link #getDefaultModelTypes()} * is present OR if any {@link #getSpecificNerModles(String)} is configured for the * parsed language. * @param lang The language to check * @return if there is any NER model configured for the parsed language */ public boolean isNerModel(String lang){ return (config.isProcessedLangage(lang) && !config.getDefaultModelTypes().isEmpty()) || !config.getSpecificNerModles(lang).isEmpty(); } }