/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.celi.classification.impl;
import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement;
import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.SKOS_CONCEPT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.Dictionary;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Vector;
import javax.xml.soap.SOAPException;
import org.apache.clerezza.commons.rdf.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl;
import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.PropertyUnbounded;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
import org.apache.stanbol.enhancer.engines.celi.CeliConstants;
import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Component(immediate = true, metatype = true)
@Service
@Properties(value = {
@Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiClassification"),
@Property(name = CeliConstants.CELI_LICENSE),
@Property(name = CeliConstants.CELI_TEST_ACCOUNT,boolValue=false),
@Property(name = CeliConstants.CELI_CONNECTION_TIMEOUT, intValue=CeliConstants.DEFAULT_CONECTION_TIMEOUT)
})
public class CeliClassificationEnhancementEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties {
/**
* This ensures that no connections to external services are made if Stanbol is started in offline mode
* as the OnlineMode service will only be available if OfflineMode is deactivated.
*/
@SuppressWarnings("unused") //it's not unused!
@Reference
private OnlineMode onlineMode;
private static List<String> supportedLangs = new Vector<String>();
static {
supportedLangs.add("en");
supportedLangs.add("fr");
supportedLangs.add("de");
supportedLangs.add("it");
supportedLangs.add("es");
supportedLangs.add("pt");
supportedLangs.add("pl");
supportedLangs.add("nl");
}
/**
* The literal factory used to create types literals
*/
private LiteralFactory literalFactory = LiteralFactory.getInstance();
/**
* The literal representing the LangIDEngine as creator.
*/
public static final Literal LANG_ID_ENGINE_NAME = LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine");
/**
* The default value for the Execution of this Engine. Currently set to
* {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
*/
public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION;
/**
* Currently used as fise:entity-type for TopicAnnotations
*/
private static final IRI OWL_CLASS = new IRI("http://www.w3.org/2002/07/owl#Class");
private Logger log = LoggerFactory.getLogger(getClass());
//NOTE: one CAN NOT store the language as member, as EnhancementEngines
// can be called in parallel by multiple threads!
//private String language = null;
/**
* This contains the only MIME type directly supported by this enhancement
* engine.
*/
private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
/**
* Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
*/
private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
@Property(value = "http://linguagrid.org/LSGrid/ws/dbpedia-classification")
public static final String SERVICE_URL = "org.apache.stanbol.enhancer.engines.celi.classification.url";
private String licenseKey;
private URL serviceURL;
private ClassificationClientHTTP client;
@Override
@Activate
protected void activate(ComponentContext ctx) throws IOException, ConfigurationException {
super.activate(ctx);
@SuppressWarnings("unchecked")
Dictionary<String, Object> properties = ctx.getProperties();
this.licenseKey = Utils.getLicenseKey(properties,ctx.getBundleContext());
String url = (String) properties.get(SERVICE_URL);
if (url == null || url.isEmpty()) {
throw new ConfigurationException(SERVICE_URL, String.format("%s : please configure the URL of the CELI Web Service (e.g. by" + "using the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName()));
}
this.serviceURL = new URL(url);
int conTimeout = Utils.getConnectionTimeout(properties, ctx.getBundleContext());
this.client = new ClassificationClientHTTP(this.serviceURL, this.licenseKey, conTimeout);
}
@Override
@Deactivate
protected void deactivate(ComponentContext ce) {
super.deactivate(ce);
}
@Override
public int canEnhance(ContentItem ci) throws EngineException {
String language = EnhancementEngineHelper.getLanguage(ci);
//canEnhance should inform if it can not enhance a ContentItem because
//of an potential error in the EnhancementChain configuration, but not
//throw runtime exceptions.
// if (language == null) {
// throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
// }
if(language==null) {
log.warn("Unable to enhance ContentItem {} because language of the Content is unknown." +
" Please check that a language identification engine is active in this EnhancementChain.",
ci.getUri());
}
if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null && this.isLangSupported(language)) {
//NOTE: ENHANCE_ASYNC indicates that the computeEnhancements Method
// correctly applies read/write locks to the contentItem
return ENHANCE_ASYNC;
} else {
return CANNOT_ENHANCE;
}
}
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
//NOTE: in the computeEnhancements Method on can check metadata already
// checked within the canEnhance method. THis is not required, but it
// may help to identify potential bugs in the EnhancementJobManager
// implementation
String language = EnhancementEngineHelper.getLanguage(ci);
if (!isLangSupported(language)){
throw new IllegalStateException("Call to computeEnhancement with unsupported language '"
+language+" for ContentItem "+ ci.getUri() +": This is also checked "
+ "in the canEnhance method! -> This indicated an Bug in the "
+ "implementation of the " + "EnhancementJobManager!");
}
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '"
+ TEXT_PLAIN_MIMETYPE + "' found for ContentItem "
+ ci.getUri() + ": This is also checked in the canEnhance "
+ "method! -> This indicates an Bug in the implementation of "
+ "the EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {} of ContentItem {}",
contentPart.getKey(),ci.getUri());
return;
}
//NOTE: EnhancementEngine implementations should pass all Exceptions
// (RuntimeExceptions as is and others wrapped as EngineExceptions).
// The EnhancementJobManager implementation has to catch and
// process all those. Handling depends on the configuration of the
// EnhancementChain (e.g. if this engine is optional enhancement of
// the ContentItem will continue).
// This is important as otherwise Users would get "200 ok" replies
// for failed enhancement requests that have failed!
//
// This means that:
// * Http clients should pass on IOExceptions and SOAPExceptions
// * No try/catch that also includes RuntimeExceptions
List<Concept> lista;
try {
lista = this.client.extractConcepts(text, language);
} catch (IOException e) { //re-throw exceptions as EngineException
throw new EngineException("Error while calling the CELI classification"
+" service (configured URL: " +serviceURL+")!",e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/"
+"response to the CELI classification service!",e);
}
if(lista.isEmpty()){ //not topics found
return; //nothing to do
}
Graph g = ci.getMetadata();
//NOTE: EnhancementEngines that use "ENHANCE_ASYNC" need to acquire a
// writeLock before modifications to the enhancement metadata
ci.getLock().writeLock().lock();
try {
//see STANBOL-617 for rules how to encode extracted topics
//we need a single TextAnnotation to link all TopicAnnotations
IRI textAnnotation = createTextEnhancement(ci, this);
// add the dc:type skos:Concept
g.add(new TripleImpl(textAnnotation, DC_TYPE, SKOS_CONCEPT));
//not create the fise:TopicAnnotations
for (Concept ne : lista) {
IRI topicAnnotation = EnhancementEngineHelper.createTopicEnhancement(ci, this);
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_REFERENCE, ne.getUri()));
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_LABEL,
new PlainLiteralImpl(ne.getLabel())));
//TODO: currently I use owl:class as entity-type, because that is
// what the linked dbpedia ontology resources are.
g.add(new TripleImpl(topicAnnotation, ENHANCER_ENTITY_TYPE, OWL_CLASS));
g.add(new TripleImpl(topicAnnotation, ENHANCER_CONFIDENCE,
literalFactory.createTypedLiteral(ne.getConfidence())));
//link to the TextAnnotation
g.add(new TripleImpl(topicAnnotation, DC_RELATION, textAnnotation));
}
} finally {
ci.getLock().writeLock().unlock();
}
}
private boolean isLangSupported(String language) {
if (supportedLangs.contains(language))
return true;
else
return false;
}
@Override
public Map<String, Object> getServiceProperties() {
return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
}
}