/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.enhancer.engines.celi.sentimentanalysis.impl; import static org.apache.stanbol.enhancer.engines.celi.utils.Utils.getSelectionContext; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START; import java.io.IOException; import java.net.URL; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Dictionary; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import javax.xml.soap.SOAPException; import org.apache.clerezza.commons.rdf.Language; import org.apache.clerezza.rdf.core.LiteralFactory; import org.apache.clerezza.commons.rdf.Graph; import org.apache.clerezza.rdf.core.NoConvertorException; import org.apache.clerezza.commons.rdf.IRI; import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl; import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl; import org.apache.felix.scr.annotations.Activate; import org.apache.felix.scr.annotations.Component; import org.apache.felix.scr.annotations.Deactivate; import org.apache.felix.scr.annotations.Properties; import org.apache.felix.scr.annotations.Property; import org.apache.felix.scr.annotations.Reference; import org.apache.felix.scr.annotations.Service; import org.apache.stanbol.commons.stanboltools.offline.OnlineMode; import org.apache.stanbol.enhancer.engines.celi.CeliConstants; import org.apache.stanbol.enhancer.engines.celi.ner.impl.CeliNamedEntityExtractionEnhancementEngine; import org.apache.stanbol.enhancer.engines.celi.utils.Utils; import org.apache.stanbol.enhancer.servicesapi.Blob; import org.apache.stanbol.enhancer.servicesapi.ContentItem; import org.apache.stanbol.enhancer.servicesapi.EngineException; import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; import org.apache.stanbol.enhancer.servicesapi.InvalidContentException; import org.apache.stanbol.enhancer.servicesapi.ServiceProperties; import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper; import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; import org.osgi.service.cm.ConfigurationException; import org.osgi.service.component.ComponentContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @Component(immediate = true, metatype = true) @Service @Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiSentiment"), @Property(name = CeliConstants.CELI_LICENSE), @Property(name = CeliConstants.CELI_TEST_ACCOUNT,boolValue=false), @Property(name = CeliConstants.CELI_CONNECTION_TIMEOUT, intValue=CeliConstants.DEFAULT_CONECTION_TIMEOUT) }) public class CeliSentimentAnalysisEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties { /** * This ensures that no connections to external services are made if Stanbol is started in offline mode * as the OnlineMode service will only be available if OfflineMode is deactivated. */ @SuppressWarnings("unused") @Reference private OnlineMode onlineMode; /** * The supported languages (configured via the {@link #SUPPORTED_LANGUAGES} * configuration. */ private Collection<String> supportedLangs; /** * The default value for the Execution of this Engine. Currently set to * {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION} */ public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION; private static final Logger log = LoggerFactory.getLogger(CeliNamedEntityExtractionEnhancementEngine.class); /** * This contains the only MIME type directly supported by this enhancement * engine. */ private static final String TEXT_PLAIN_MIMETYPE = "text/plain"; /** * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */ private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE); @Property(value = "http://linguagrid.org/LSGrid/ws/sentiment-analysis") public static final String SERVICE_URL = "org.apache.stanbol.enhancer.engines.celi.celiSentiment.url"; @Property(value = {"fr","it"},cardinality=1000) public static final String SUPPORTED_LANGUAGES = "org.apache.stanbol.enhancer.engines.celi.celiSentiment.languages"; private String licenseKey; private URL serviceURL; private SentimentAnalysisServiceClientHttp client; @Override @Activate protected void activate(ComponentContext ctx) throws IOException, ConfigurationException { super.activate(ctx); @SuppressWarnings("unchecked") Dictionary<String, Object> properties = ctx.getProperties(); log.info("Activate CELI Sentiment Analysis engine:"); log.info(" > name: {}",getName()); this.licenseKey = Utils.getLicenseKey(properties,ctx.getBundleContext()); String url = (String) properties.get(SERVICE_URL); if (url == null || url.isEmpty()) { throw new ConfigurationException(SERVICE_URL, String.format("%s : please configure the URL of the CELI Web Service (e.g. by" + "using the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName())); } this.serviceURL = new URL(url); int connectionTimeout = Utils.getConnectionTimeout(properties, ctx.getBundleContext()); this.client = new SentimentAnalysisServiceClientHttp(this.serviceURL, this.licenseKey,connectionTimeout); log.info(" > CELI service: {}",serviceURL); //init the supported languages (now configurable) Object languageObject = properties.get(SUPPORTED_LANGUAGES); HashSet<String> languages; if(languageObject instanceof String){ //support splitting multiple languages with ';' languages = new HashSet<String>(Arrays.asList(languageObject.toString().split(";"))); if(languages.remove("")){ log.warn("Languages configuration '{}' contained empty language -> removed",languageObject); }//empty not allowed } else if(languageObject instanceof Iterable<?>){ //does not work for arrays :( languages = new HashSet<String>(); for(Object o : (Iterable<Object>)languageObject){ if(o != null && !o.toString().isEmpty()){ languages.add(o.toString()); } else { log.warn("Language configuration '{}' contained illegal value '{}' -> removed", languageObject,o); } } } else if(languageObject.getClass().isArray()){ languages = new HashSet<String>(); for(Object langObj : (Object[])languageObject){ if(langObj != null){ languages.add(langObj.toString()); } else { log.warn("Language configuration '{}' contained illegal value '{}' -> removed", Arrays.toString((Object[])languageObject),langObj); } } } else { languages = null; } if(languages == null || languages.isEmpty()){ throw new ConfigurationException(SUPPORTED_LANGUAGES, String.format( "Missing or invalid configuration of the supported languages (config :'%s'", languageObject != null && languageObject.getClass().isArray() ? Arrays.toString((Object[])languageObject): //nicer logging for arrays languageObject)); } this.supportedLangs = Collections.unmodifiableSet(languages); log.info(" > supported languages: {}",supportedLangs); } @Override public int canEnhance(ContentItem ci) throws EngineException { String language = EnhancementEngineHelper.getLanguage(ci); if (language == null) { log.info("Unable to extract language annotation for ContentItem -> will not enhance", ci.getUri()); return CANNOT_ENHANCE; } else if(!isLangSupported(language)){ log.debug("Language '{}' of contentItem {} is not supported (supported: {}) -> will not enhance", new Object[]{language,ci.getUri(),supportedLangs}); return CANNOT_ENHANCE; } if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) return ENHANCE_ASYNC; else log.debug("No Content of type {} found in ConentItem {} -> will not enhance", SUPPORTED_MIMTYPES,ci.getUri()); return CANNOT_ENHANCE; } @Override public void computeEnhancements(ContentItem ci) throws EngineException { Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES); if (contentPart == null) { throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!"); } String text = ""; try { text = ContentItemHelper.getText(contentPart.getValue()); } catch (IOException e) { throw new InvalidContentException(this, ci, e); } if (text.trim().length() == 0) { log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}"); return; } String language = EnhancementEngineHelper.getLanguage(ci); if (language == null) { throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!"); } Language lang = new Language(language); //used for the palin literals in TextAnnotations try { List<SentimentExpression> lista = this.client.extractSentimentExpressions(text, language); LiteralFactory literalFactory = LiteralFactory.getInstance(); Graph g = ci.getMetadata(); for (SentimentExpression se : lista) { try { IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this); //add selected text as PlainLiteral in the language extracted from the text g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(se.getSnippetStr(),lang))); g.add(new TripleImpl(textAnnotation, DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION)); if (se.getStartSnippet() != null && se.getEndSnippet() != null) { g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(se.getStartSnippet().intValue()))); g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(se.getEndSnippet().intValue()))); g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, se.getSnippetStr(), se.getStartSnippet()), lang))); g.add(new TripleImpl(textAnnotation, CeliConstants.HAS_SENTIMENT_EXPRESSION_POLARITY, literalFactory.createTypedLiteral(se.getSentimentPolarityAsDoubleValue()))); } } catch (NoConvertorException e) { log.error(e.getMessage(), e); } } } catch (IOException e) { throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " +serviceURL+")!",e); } catch (SOAPException e) { throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!",e); } } @Override @Deactivate protected void deactivate(ComponentContext ce) { super.deactivate(ce); this.supportedLangs = null; this.client = null; this.serviceURL = null; } @Override public Map<String, Object> getServiceProperties() { return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder)); } private boolean isLangSupported(String language) { return supportedLangs.contains(language); } }