/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.opennlp.impl;
import static org.apache.stanbol.enhancer.nlp.NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.opennlp.OpenNLP;
import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.osgi.framework.Constants;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
/**
* Apache Stanbol Enhancer Named Entity Recognition enhancement engine based on opennlp's Maximum Entropy
* models.
*/
@Component(
metatype = true,
immediate = true,
inherit = true,
configurationFactory = true,
policy = ConfigurationPolicy.OPTIONAL,
specVersion = "1.1",
label = "%stanbol.NamedEntityExtractionEnhancementEngine.name",
description = "%stanbol.NamedEntityExtractionEnhancementEngine.description")
@Service
@org.apache.felix.scr.annotations.Properties(value={
@Property(name=EnhancementEngine.PROPERTY_NAME,value="opennlp-ner"),
@Property(name=NamedEntityExtractionEnhancementEngine.PROCESSED_LANGUAGES,value=""),
@Property(name=NamedEntityExtractionEnhancementEngine.DEFAULT_LANGUAGE,value=""),
//set the ranking of the default config to a negative value (ConfigurationPolicy.OPTIONAL)
@Property(name=Constants.SERVICE_RANKING,intValue=-100)
})
@Reference(name="openNLP",referenceInterface=OpenNLP.class,
cardinality=ReferenceCardinality.MANDATORY_UNARY,
policy=ReferencePolicy.STATIC)
public class NamedEntityExtractionEnhancementEngine
extends NEREngineCore
implements EnhancementEngine, ServiceProperties {
public static final String DEFAULT_DATA_OPEN_NLP_MODEL_LOCATION = "org/apache/stanbol/defaultdata/opennlp";
/**
* Allows to define the default language assumed for parsed Content if no language
* detection is available. If <code>null</code> or empty this engine will not
* process content with an unknown language
*/
public static final String DEFAULT_LANGUAGE = "stanbol.NamedEntityExtractionEnhancementEngine.defaultLanguage";
/**
* Allows to restrict the list of languages processed by this engine. if
* <code>null</code> or empty content of any language where a NER model is
* available via {@link OpenNLP} will be processed.<p>
* This property allows to configure multiple instances of this engine that
* do only process specific languages. The default is a single instance that
* processes all languages.
*/
public static final String PROCESSED_LANGUAGES = "stanbol.NamedEntityExtractionEnhancementEngine.processedLanguages";
/**
* The default value for the Execution of this Engine. Currently set to
* {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
*/
public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION;
private static final Map<String,Object> SERVICE_PROPERTIES;
static {
Map<String,Object> sp = new HashMap<String,Object>();
sp.put(ENHANCEMENT_ENGINE_ORDERING,defaultOrder);
sp.put(ENHANCEMENT_ENGINE_NLP_ROLE, NlpProcessingRole.NamedEntityRecognition);
SERVICE_PROPERTIES = Collections.unmodifiableMap(sp);
}
/**
* Bind method of {@link NEREngineCore#openNLP}
* @param openNlp
*/
protected void bindOpenNLP(OpenNLP openNlp){
this.openNLP = openNlp;
}
/**
* Unbind method of {@link NEREngineCore#openNLP}
* @param openNLP
*/
protected void unbindOpenNLP(OpenNLP openNLP){
this.openNLP = null;
}
protected void activate(ComponentContext ctx) throws IOException, ConfigurationException {
super.activate(ctx);
config = new NEREngineConfig();
// Need to register the default data before loading the models
Object value = ctx.getProperties().get(DEFAULT_LANGUAGE);
if(value != null && !value.toString().isEmpty()){
config.setDefaultLanguage(value.toString());
} //else no default language
value = ctx.getProperties().get(PROCESSED_LANGUAGES);
if(value instanceof String[]){
config.getProcessedLanguages().addAll(Arrays.asList((String[]) value));
config.getProcessedLanguages().remove(null); //remove null
config.getProcessedLanguages().remove(""); //remove empty
} else if (value instanceof Collection<?>){
for(Object o : ((Collection<?>)value)){
if(o != null){
config.getProcessedLanguages().add(o.toString());
}
}
config.getProcessedLanguages().remove(""); //remove empty
} else if(value != null && !value.toString().isEmpty()){
//if a single String is parsed we support ',' as seperator
String[] languageArray = value.toString().split(",");
config.getProcessedLanguages().addAll(Arrays.asList(languageArray));
config.getProcessedLanguages().remove(null); //remove null
config.getProcessedLanguages().remove(""); //remove empty
} //else no configuration
if(!config.getProcessedLanguages().isEmpty() && config.getDefaultLanguage() != null &&
!config.getProcessedLanguages().contains(config.getDefaultLanguage())){
throw new ConfigurationException(PROCESSED_LANGUAGES, "The list of" +
"processed Languages "+config.getProcessedLanguages()+" MUST CONTAIN the" +
"configured default language '"+config.getDefaultLanguage()+"'!");
}
}
protected void deactivate(ComponentContext ctx) {
config = null;
super.deactivate(ctx);
}
@Override
public Map<String,Object> getServiceProperties() {
return SERVICE_PROPERTIES;
}
// @Override
// public int canEnhance(ContentItem ci) throws EngineException {
// checkCore();
// return engineCore.canEnhance(ci);
// }
// @Override
// public void computeEnhancements(ContentItem ci) throws EngineException {
// checkCore();
// engineCore.computeEnhancements(ci);
// }
// private void checkCore() {
// if(engineCore == null) {
// throw new IllegalStateException("EngineCore not initialized");
// }
// }
}