/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.enhancer.engines.sentiment.summarize; import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.SENTIMENT_ANNOTATION; import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.EnumSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.NavigableMap; import java.util.TreeMap; import org.apache.clerezza.commons.rdf.Language; import org.apache.clerezza.rdf.core.LiteralFactory; import org.apache.clerezza.commons.rdf.Graph; import org.apache.clerezza.commons.rdf.IRI; import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl; import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl; import org.apache.felix.scr.annotations.Activate; import org.apache.felix.scr.annotations.Component; import org.apache.felix.scr.annotations.ConfigurationPolicy; import org.apache.felix.scr.annotations.Deactivate; import org.apache.felix.scr.annotations.Properties; import org.apache.felix.scr.annotations.Property; import org.apache.felix.scr.annotations.Service; import org.apache.stanbol.enhancer.nlp.NlpAnnotations; import org.apache.stanbol.enhancer.nlp.model.AnalysedText; import org.apache.stanbol.enhancer.nlp.model.Section; import org.apache.stanbol.enhancer.nlp.model.Sentence; import org.apache.stanbol.enhancer.nlp.model.Span; import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum; import org.apache.stanbol.enhancer.nlp.model.Token; import org.apache.stanbol.enhancer.nlp.model.annotation.Value; import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory; import org.apache.stanbol.enhancer.nlp.pos.Pos; import org.apache.stanbol.enhancer.nlp.pos.PosTag; import org.apache.stanbol.enhancer.nlp.utils.NIFHelper; import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper; import org.apache.stanbol.enhancer.servicesapi.ContentItem; import org.apache.stanbol.enhancer.servicesapi.EngineException; import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; import org.apache.stanbol.enhancer.servicesapi.ServiceProperties; import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum; import org.osgi.framework.Constants; import org.osgi.service.cm.ConfigurationException; import org.osgi.service.component.ComponentContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * {@link EnhancementEngine} that summarizes {@link Token} level * Sentiment tags for NounPhraces, Sentences and the whole * Content. * @author Rupert Westenthaler * */ @Component(immediate = true, metatype = true, policy=ConfigurationPolicy.OPTIONAL, configurationFactory=true) //allow multiple instances to be configured @Service @Properties(value={ @Property(name=EnhancementEngine.PROPERTY_NAME,value=SentimentSummarizationEngine.DEFAULT_ENGINE_NAME), @Property(name=SentimentSummarizationEngine.PROPERTY_DOCUMENT_SENTIMENT_STATE, boolValue=true), @Property(name=SentimentSummarizationEngine.PROPERTY_SENTENCE_SENTIMENT_STATE, boolValue=true), @Property(name=SentimentSummarizationEngine.PROPERTY_PHRASE_SENTIMENT_STATE, boolValue=true), @Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the default instance a ranking < 0 }) public class SentimentSummarizationEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException> implements ServiceProperties { public static final String PROPERTY_PHRASE_SENTIMENT_STATE = "enhancer.engine.sentiment.summarization.phraseSentimentState"; public static final boolean DEFAULT_PHRASE_SENTIMENT_STATE = true; public static final String PROPERTY_SENTENCE_SENTIMENT_STATE = "enhancer.engine.sentiment.summarization.sentenceSentimentState"; public static final boolean DEFAULT_SENTENCE_SENTIMENT_STATE = true; public static final String PROPERTY_DOCUMENT_SENTIMENT_STATE = "enhancer.engine.sentiment.summarization.documentSentimentState"; public static final boolean DEFAULT_DOCUMENT_SENTIMENT_STATE = true; // public static final String PROPERTY_NOUN_CONTEXT_SIZE = "enhancer.engine.sentiment.summarization.nounContextSize"; private final Logger log = LoggerFactory.getLogger(getClass()); private static final EnumSet<Pos> DEFAULT_SECTION_BORDER_TAGS = EnumSet.of( Pos.SentenceMedialPunctuation); private static final EnumSet<Pos> DEFAULT_NEGATION_TAGS = EnumSet.of( Pos.NegativeAdverb,Pos.NegativeDeterminer, Pos.NegativeParticle, Pos.NegativePronoun); private static final EnumSet<LexicalCategory> DEFAULT_COUNT_LEXICAL_CATEGORIES = EnumSet.of( LexicalCategory.Noun,LexicalCategory.Verb,LexicalCategory.Adjective); private static final Double ZERO = Double.valueOf(0.0); public static final String DEFAULT_ENGINE_NAME = "sentiment-summarization"; //TODO: change this to a real sentiment ontology /** * The property used to write the sum of all positive classified words */ public static final IRI POSITIVE_SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"positive-sentiment"); /** * The property used to write the sum of all negative classified words */ public static final IRI NEGATIVE_SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"negative-sentiment"); /** * The sentiment of the section (sum of positive and negative classifications) */ public static final IRI SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"sentiment"); /** * The dc:type value used for fise:TextAnnotations indicating a Sentiment */ public static final IRI SENTIMENT_TYPE = new IRI(NamespaceEnum.fise+"Sentiment"); /** * The dc:Type value sued for the sentiment annotation of the whole document */ public static final IRI DOCUMENT_SENTIMENT_TYPE = new IRI(NamespaceEnum.fise+"DocumentSentiment"); private static final int DEFAULT_NEGATION_CONTEXT = 2; private static final int DEFAULT_CONJUCTION_CONTEXT = 1; private static final int DEFAULT_NOUN_CONTEXT = 4; boolean writeSentimentPhrases = true; boolean writeSentencesSentimet = true; boolean writeDocumentSentiment = true; boolean writeSentimentData = false; private EnumSet<Pos> negativePosTags = DEFAULT_NEGATION_TAGS; private EnumSet<Pos> sectionBorderPosTags = DEFAULT_SECTION_BORDER_TAGS; private EnumSet<LexicalCategory> countableLexCats = DEFAULT_COUNT_LEXICAL_CATEGORIES; private final LiteralFactory lf = LiteralFactory.getInstance(); private int negationContext = DEFAULT_NEGATION_CONTEXT; private int nounContext = DEFAULT_NOUN_CONTEXT; private int conjuctionContext = DEFAULT_CONJUCTION_CONTEXT; /** * Used to sort {@link Sentiment}s before merging them to {@link SentimentPhrase}s */ private static final Comparator<Sentiment> sentimentComparator = new Comparator<Sentiment>(){ @Override public int compare(Sentiment s1, Sentiment s2) { if(s1.getStart() == s2.getStart()){ return s1.getEnd() > s2.getEnd() ? -1 : s1.getEnd() == s2.getEnd() ? 0 : -1; } else { return s1.getStart() < s2.getStart() ? -1 : 1; } } }; @Override @Activate protected void activate(ComponentContext ctx) throws ConfigurationException { log.info(" activate {} with config {}",getClass().getSimpleName(),ctx.getProperties()); super.activate(ctx); //should we write sentiment values for the document Object value = ctx.getProperties().get(PROPERTY_DOCUMENT_SENTIMENT_STATE); this.writeDocumentSentiment = value == null ? DEFAULT_DOCUMENT_SENTIMENT_STATE : value instanceof Boolean ? ((Boolean)value).booleanValue() : Boolean.parseBoolean(value.toString()); //should we write sentiment values for sentences value = ctx.getProperties().get(PROPERTY_SENTENCE_SENTIMENT_STATE); this.writeSentencesSentimet = value == null ? DEFAULT_SENTENCE_SENTIMENT_STATE : value instanceof Boolean ? ((Boolean)value).booleanValue() : Boolean.parseBoolean(value.toString()); //should we write sentiment values for phrases value = ctx.getProperties().get(PROPERTY_PHRASE_SENTIMENT_STATE); this.writeSentimentPhrases = value == null ? DEFAULT_PHRASE_SENTIMENT_STATE : value instanceof Boolean ? ((Boolean)value).booleanValue() : Boolean.parseBoolean(value.toString()); } @Override @Deactivate protected void deactivate(ComponentContext ctx) { super.deactivate(ctx); } @Override public int canEnhance(ContentItem ci) throws EngineException { return NlpEngineHelper.getAnalysedText(this, ci, false) != null && NlpEngineHelper.getLanguage(this, ci, false) != null ? ENHANCE_ASYNC : CANNOT_ENHANCE; } @Override public void computeEnhancements(ContentItem ci) throws EngineException { String language = NlpEngineHelper.getLanguage(this, ci, true); AnalysedText at = NlpEngineHelper.getAnalysedText(this, ci, true); //configure the spanTypes based on the configuration // EnumSet<Span.SpanTypeEnum> spanTypes = EnumSet.noneOf(SpanTypeEnum.class); // if(writeSentimentPhrases){ // spanTypes.add(SpanTypeEnum.Chunk); // } // if(writeSentencesSentimet){ // spanTypes.add(SpanTypeEnum.Sentence); // } // if(writeTextSectionSentiments){ // spanTypes.add(SpanTypeEnum.TextSection); // } // if(writeTextSentiments ){ // spanTypes.add(SpanTypeEnum.Text); // } List<SentimentPhrase> sentiments = extractSentiments(at, language); String detectedLang = EnhancementEngineHelper.getLanguage(ci); ci.getLock().writeLock().lock(); try { writeSentimentEnhancements(ci,sentiments,at, detectedLang == null ? null : new Language(detectedLang)); } finally { ci.getLock().writeLock().unlock(); } } @Override public Map<String,Object> getServiceProperties() { return Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object)ORDERING_EXTRACTION_ENHANCEMENT); } /** * Extracts {@link Sentiment}s for words with a {@link NlpAnnotations#SENTIMENT_ANNOTATION}. * The {@link NlpAnnotations#POS_ANNOTATION}s are used to link those words with * {@link LexicalCategory#Noun}s. * @param at the AnalyzedText to process * @return the {@link Sentiment} instances organised along {@link Sentence}s. If * no {@link Sentence}s are present on the parsed {@link AnalysedText}, than all * {@link Sentiment}s are added to the {@link AnalysedText}. Otherwise only * {@link Sentiment}s not contained within a {@link Sentence} are added to the * {@link AnalysedText} key. */ private List<SentimentPhrase> extractSentiments(AnalysedText at, String language) { //we do use Sentences (optional) and Tokens (required) Iterator<Span> tokenIt = at.getEnclosed(EnumSet.of( SpanTypeEnum.Sentence, SpanTypeEnum.Token)); List<Sentiment> sentimentTokens = new ArrayList<Sentiment>(32); NavigableMap<Integer,Token> negations = new TreeMap<Integer,Token>(); NavigableMap<Integer,Token> nounsAndPronouns = new TreeMap<Integer,Token>(); NavigableMap<Integer,Token> verbs = new TreeMap<Integer,Token>(); NavigableMap<Integer,Token> conjuctions = new TreeMap<Integer,Token>(); NavigableMap<Integer,Token> sectionBorders = new TreeMap<Integer,Token>(); boolean firstTokenInSentence = true; Sentence sentence = null; final List<SentimentPhrase> sentimentPhrases = new ArrayList<SentimentPhrase>(); while(tokenIt.hasNext()){ Span span = tokenIt.next(); switch (span.getType()) { case Token: Token word = (Token)span; Integer wordIndex = sentimentTokens.size(); Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION); boolean addToList = false; Sentiment sentiment = null; if(sentimentAnnotation != null && sentimentAnnotation.value() != null && !sentimentAnnotation.value().equals(ZERO)){ sentiment = new Sentiment(word, sentimentAnnotation.value(), sentence == null || word.getEnd() > sentence.getEnd() ? null : sentence); addToList = true; } if(isNegation((Token)span, language)){ addToList = true; negations.put(wordIndex, word); } else if(isNoun(word, firstTokenInSentence, language) || isPronoun(word,language)){ addToList = true; nounsAndPronouns.put(wordIndex, word); } else if(isSectionBorder(word, language)){ addToList = true; sectionBorders.put(wordIndex, word); } else if(isVerb(word, language)){ addToList = true; verbs.put(wordIndex, word); } else if(isCoordinatingConjuction(word,language)){ addToList = true; conjuctions.put(wordIndex, word); } else if(isCountable(word, language)){ addToList = true; } if(log.isDebugEnabled()){ Value<PosTag> pos = word.getAnnotation(NlpAnnotations.POS_ANNOTATION); log.debug(" [{}] '{}' pos: {}, sentiment {}", new Object[]{ addToList ? sentimentTokens.size() : "-", word.getSpan(),pos.value().getCategories(), sentiment == null ? "none" : sentiment.getValue()}); } if(addToList){ sentimentTokens.add(sentiment); //add the token } firstTokenInSentence = false; break; case Sentence: //cleanup the previous sentence sentimentPhrases.addAll(summarizeSentence(sentimentTokens, negations, nounsAndPronouns, verbs, conjuctions, sectionBorders)); negations.clear(); nounsAndPronouns.clear(); sentimentTokens.clear(); verbs.clear(); sectionBorders.clear(); firstTokenInSentence = true; sentence = (Sentence)span; break; case TextSection: break; default: break; } } sentimentPhrases.addAll(summarizeSentence(sentimentTokens, negations, nounsAndPronouns, verbs, conjuctions, sectionBorders)); return sentimentPhrases; } /** * @param sentimentTokens * @param negations * @param nounsAndPronouns * @param verbs * @param sectionBorders */ private List<SentimentPhrase> summarizeSentence(List<Sentiment> sentimentTokens, NavigableMap<Integer,Token> negations, NavigableMap<Integer,Token> nounsAndPronouns, NavigableMap<Integer,Token> verbs, NavigableMap<Integer,Token> conjunctions, NavigableMap<Integer,Token> sectionBorders) { List<Sentiment> processedSentiments = new ArrayList<Sentiment>(); Integer[] searchSpan = new Integer[]{-1,-1}; for(int i = 0; i < sentimentTokens.size(); i++){ Integer index = Integer.valueOf(i); Sentiment sentiment = sentimentTokens.get(i); if(sentiment != null){ //check for a new section if(index.compareTo(searchSpan[1]) > 0) { searchSpan[0] = sectionBorders.floorKey(index); if(searchSpan[0] == null) { searchSpan[0] = Integer.valueOf(0); } searchSpan[1] = sectionBorders.ceilingKey(index); if(searchSpan[1] == null) { searchSpan[1] = Integer.valueOf(sentimentTokens.size()-1); } } //for negation use the negation context Integer[] context = getNegationContext(index, conjunctions, searchSpan); for(Token negationToken : negations.subMap(context[0] , true, context[1], true).values()){ sentiment.addNegate(negationToken); } //for nouns use the sentiment context context = getSentimentContext(index, sentiment, verbs, conjunctions, nounsAndPronouns, searchSpan); for(Token word : nounsAndPronouns.subMap(context[0] , true, context[1], true).values()){ sentiment.addAbout(word); } processedSentiments.add(sentiment); } } //now combine the processed sentiments to SentimentPhrases Collections.sort(processedSentiments, sentimentComparator); List<SentimentPhrase> sentimentPhrases = new ArrayList<SentimentPhrase>(); SentimentPhrase phrase = null; for(Sentiment sentiment : processedSentiments){ if(phrase == null || sentiment.getStart() > phrase.getEndIndex()){ phrase = new SentimentPhrase(sentiment); sentimentPhrases.add(phrase); } else { phrase.addSentiment(sentiment); } } return sentimentPhrases; } private Integer[] getNegationContext(Integer index, NavigableMap<Integer,Token> conjunctions, Integer[] sectionSpan) { Integer[] context = new Integer[]{ Integer.valueOf(Math.max(index-negationContext,sectionSpan[0])), Integer.valueOf(Math.min(index+negationContext,sectionSpan[1]))}; Integer floorConjunction = conjunctions.floorKey(index); //consider conjuction "The helmet is not comfortable and easy to use" //the "not" refers both to "comfortable" and "easy" if(floorConjunction != null && floorConjunction.compareTo(index-conjuctionContext) >= 0){ context[0] = Integer.valueOf(Math.max(floorConjunction-negationContext-1,sectionSpan[0])); } return context; } private Integer[] getSentimentContext(Integer index, Sentiment sentiment, NavigableMap<Integer,Token> verbs, NavigableMap<Integer,Token> conjunctions, NavigableMap<Integer,Token> nouns, Integer[] sectionSpan) { Integer[] context; PosTag pos = sentiment.getPosTag(); boolean isPredicative; if(pos != null && pos.getPosHierarchy().contains(Pos.PredicativeAdjective)){ isPredicative = true; } else if(pos != null && pos.hasCategory(LexicalCategory.Adjective) && //Adjective that are not directly in front of a Noun nouns.get(Integer.valueOf(index+1)) == null){ isPredicative = true; } else { isPredicative = false; } if(isPredicative){ // Integer floorConjunction = conjunctions.floorKey(index); // if(floorConjunction != null && floorConjunction.compareTo( // Integer.valueOf(Math.max(index-conjuctionContext,sectionSpan[0]))) >= 0){ // lowIndex = Integer.valueOf(floorConjunction-1); // } // Integer ceilingConjunction = conjunctions.ceilingKey(index); // if(ceilingConjunction != null && ceilingConjunction.compareTo( // Integer.valueOf(Math.min(index+conjuctionContext,sectionSpan[1]))) <= 0){ // highIndex = Integer.valueOf(ceilingConjunction+1); // } //use the verb as context Integer floorNoun = nouns.floorKey(index); Entry<Integer,Token> floorVerb = verbs.floorEntry(index); Integer ceilingNoun = nouns.ceilingKey(index); Entry<Integer,Token> ceilingVerb = verbs.ceilingEntry(index); floorVerb = floorVerb == null || floorVerb.getKey().compareTo(sectionSpan[0]) < 0 || //do not use verbs with an noun in-between (floorNoun != null && floorVerb.getKey().compareTo(floorNoun) < 0) ? null : floorVerb; ceilingVerb = ceilingVerb == null || ceilingVerb.getKey().compareTo(sectionSpan[1]) > 0 || //do not use verbs with an noun in-between (ceilingNoun != null && ceilingVerb.getKey().compareTo(ceilingNoun) > 0) ? null : ceilingVerb; Entry<Integer,Token> verb; if(ceilingVerb != null && floorVerb != null){ verb = (index - floorVerb.getKey()) < (ceilingVerb.getKey()-index) ? floorVerb : ceilingVerb; } else if(ceilingVerb != null){ verb = ceilingVerb; } else if(floorVerb != null){ verb = floorVerb; } else { //no verb that can be used as context ... return an area around the current pos. verb = null; } if(verb != null){ if(verb.getKey().compareTo(index) < 0){ Integer floorConjunction = conjunctions.floorKey(verb.getKey()); if(floorConjunction != null && floorConjunction.compareTo( Integer.valueOf(Math.max(verb.getKey()-conjuctionContext,sectionSpan[0]))) >= 0){ //search an other verb in the same direction floorVerb = verbs.floorEntry(floorConjunction); if(floorVerb != null && floorVerb.getKey().compareTo(sectionSpan[0]) >= 0 && //do not step over an noun (floorNoun == null || floorVerb.getKey().compareTo(floorNoun) >= 0)){ verb = floorVerb; } } } else if(verb.getKey().compareTo(index) > 0){ Integer ceilingConjunction = conjunctions.ceilingKey(verb.getKey()); if(ceilingConjunction != null && ceilingConjunction.compareTo( Integer.valueOf(Math.min(verb.getKey()+conjuctionContext,sectionSpan[1]))) >= 0){ //search an other verb in the same direction ceilingVerb = verbs.floorEntry(ceilingConjunction); if(ceilingVerb != null && ceilingVerb.getKey().compareTo(sectionSpan[1]) <= 0 && //do not step over an noun (ceilingNoun == null || ceilingVerb.getKey().compareTo(ceilingNoun) <= 0)){ verb = ceilingVerb; } } } context = new Integer[]{Integer.valueOf(verb.getKey()-nounContext), Integer.valueOf(verb.getKey()+nounContext)}; sentiment.setVerb(verb.getValue()); } else { context = new Integer[]{Integer.valueOf(index-nounContext), Integer.valueOf(index+nounContext)}; } } else if(pos != null && pos.hasCategory(LexicalCategory.Adjective)){ //for all other adjective the affected noun is expected directly //after the noun context = new Integer[]{index,Integer.valueOf(index+1)}; } else if(pos != null && pos.hasCategory(LexicalCategory.Noun)){ //a noun with an sentiment context = new Integer[]{index,index}; } else { //else (includes pos == null) return default context = new Integer[]{Integer.valueOf(index-nounContext), Integer.valueOf(index+nounContext)}; } //ensure the returned context does not exceed the parsed sectionSpan if(context[0].compareTo(sectionSpan[0]) < 0){ context[0] = sectionSpan[0]; } if(context[1].compareTo(sectionSpan[1]) > 0) { context[1] = sectionSpan[1]; } return context; } private boolean isPronoun(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); return posAnnotation == null ? false : posAnnotation.value().getPosHierarchy().contains(Pos.Pronoun); } private boolean isVerb(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); return posAnnotation == null ? false : posAnnotation.value().hasCategory(LexicalCategory.Verb); } private boolean isCoordinatingConjuction(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); return posAnnotation == null ? false : posAnnotation.value().getPosHierarchy().contains(Pos.CoordinatingConjunction); } private boolean isSectionBorder(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(sectionBorderPosTags, posAnnotation.value().getPosHierarchy())){ return true; } else { return false; } } /** * Checks if the parsed {@link Token} represents an negation * @param token the word * @param language the language * @return <code>true</code> if the {@link Token} represents a negation. * Otherwise <code>false</code> */ private boolean isNegation(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(negativePosTags, posAnnotation.value().getPosHierarchy())){ return true; } else { return false; } } /** * Checks if the parsed {@link Token} represents an negation * @param token the word * @param index the index of the token relative to the sentence | section * @param language the language * @return <code>true</code> if the {@link Token} represents a negation. * Otherwise <code>false</code> */ private boolean isNoun(Token token, boolean firstTokenInSentence, String language) { String word = token.getSpan(); if(!firstTokenInSentence && !word.isEmpty() && Character.isUpperCase(word.charAt(0))){ return true; //assume all upper case tokens are Nouns } Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && (posAnnotation.value().hasCategory(LexicalCategory.Noun) || posAnnotation.value().getPosHierarchy().contains(Pos.CardinalNumber))){ return true; } return false; } /** * If the current Token should be considered for counting distances to * negations and nouns * @param token * @param language * @return */ private boolean isCountable(Token token, String language){ Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(countableLexCats, posAnnotation.value().getCategories())){ return true; } else { return false; } } private void writeSentimentEnhancements(ContentItem ci, List<SentimentPhrase> sentimentPhrases, AnalysedText at, Language lang) { // TODO Auto-generated method stub Graph metadata = ci.getMetadata(); Sentence currentSentence = null; final List<SentimentPhrase> sentencePhrases = new ArrayList<SentimentPhrase>(); for(SentimentPhrase sentPhrase : sentimentPhrases){ Sentence sentence = sentPhrase.getSentence(); if(log.isDebugEnabled()){ //debug sentiment info CharSequence phraseText = at.getText().subSequence(sentPhrase.getStartIndex(), sentPhrase.getEndIndex()); log.debug("Write SentimentPhrase for {} (sentence: {})", phraseText, sentence == null ? "none" : sentence.getSpan().length() > 17 ? (sentence.getSpan().subSequence(0,17) + "...") : sentence.getSpan()); List<Sentiment> sentiments = sentPhrase.getSentiments(); log.debug(" > {} Sentiments:",sentiments.size()); for(int i = 0; i < sentiments.size(); i++){ log.debug(" {}. {}",i+1,sentiments.get(i)); } } if(writeSentimentPhrases){ IRI enh = createTextEnhancement(ci, this); String phraseText = at.getSpan().substring(sentPhrase.getStartIndex(), sentPhrase.getEndIndex()); metadata.add(new TripleImpl(enh, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(phraseText, lang))); if(sentPhrase.getSentence() == null){ metadata.add(new TripleImpl(enh, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext( at.getSpan(), phraseText, sentPhrase.getStartIndex()),lang))); } else { metadata.add(new TripleImpl(enh, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(sentPhrase.getSentence().getSpan(),lang))); } metadata.add(new TripleImpl(enh, ENHANCER_START, lf.createTypedLiteral(sentPhrase.getStartIndex()))); metadata.add(new TripleImpl(enh, ENHANCER_END, lf.createTypedLiteral(sentPhrase.getEndIndex()))); if(sentPhrase.getPositiveSentiment() != null){ metadata.add(new TripleImpl(enh, POSITIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getPositiveSentiment()))); } if(sentPhrase.getNegativeSentiment() != null){ metadata.add(new TripleImpl(enh, NEGATIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getNegativeSentiment()))); } metadata.add(new TripleImpl(enh, SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getSentiment()))); //add the Sentiment type as well as the type of the SSO Ontology metadata.add(new TripleImpl(enh, DC_TYPE, SENTIMENT_TYPE)); IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(SpanTypeEnum.Chunk); if(ssoType != null){ metadata.add(new TripleImpl(enh, DC_TYPE, ssoType)); } } if(writeSentencesSentimet && sentence != null){ if(sentence.equals(currentSentence)){ sentencePhrases.add(sentPhrase); } else { writeSentiment(ci, currentSentence,sentencePhrases); //reset currentSentence = sentence; sentencePhrases.clear(); sentencePhrases.add(sentPhrase); } } } if(!sentencePhrases.isEmpty()){ writeSentiment(ci, currentSentence,sentencePhrases); } if(writeDocumentSentiment){ writeSentiment(ci, at,sentimentPhrases); } } private void writeSentiment(ContentItem ci, Section section, List<SentimentPhrase> sectionPhrases) { if(section == null || sectionPhrases == null || sectionPhrases.isEmpty()){ return; //nothing to do } IRI enh = createTextEnhancement(ci, this); Graph metadata = ci.getMetadata(); if(section.getType() == SpanTypeEnum.Sentence){ //TODO use the fise:TextAnnotation new model for //add start/end positions metadata.add(new TripleImpl(enh, ENHANCER_START, lf.createTypedLiteral(section.getStart()))); metadata.add(new TripleImpl(enh, ENHANCER_END, lf.createTypedLiteral(section.getEnd()))); } //TODO: Summarize the sentiments of this section //add the sentiment information double positiveSent = 0.0; int positiveCount = 0; double negativeSent = 0.0; int negativeCount = 0; for(SentimentPhrase sentPhrase : sectionPhrases){ if(sentPhrase.getNegativeSentiment() != null){ double neg = sentPhrase.getNegativeSentiment(); negativeSent = negativeSent+(neg*neg); negativeCount++; } if(sentPhrase.getPositiveSentiment() != null){ double pos = sentPhrase.getPositiveSentiment(); positiveSent = positiveSent+(pos*pos); positiveCount++; } } if(positiveCount > 0){ positiveSent = Math.sqrt(positiveSent/(double)positiveCount); metadata.add(new TripleImpl(enh, POSITIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(positiveSent)))); } if(negativeCount > 0){ negativeSent = Math.sqrt(negativeSent/(double)negativeCount)*-1; metadata.add(new TripleImpl(enh, NEGATIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(negativeSent)))); } metadata.add(new TripleImpl(enh, SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(negativeSent+positiveSent)))); //add the Sentiment type as well as the type of the SSO Ontology metadata.add(new TripleImpl(enh, DC_TYPE, SENTIMENT_TYPE)); IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(section.getType()); if(ssoType != null){ metadata.add(new TripleImpl(enh, DC_TYPE, ssoType)); } if(section.getType() == SpanTypeEnum.Text){ metadata.add(new TripleImpl(enh, DC_TYPE, DOCUMENT_SENTIMENT_TYPE)); } } /** * The maximum size of the preix/suffix for the selection context */ private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50; /** * Extracts the selection context based on the content, selection and * the start char offset of the selection * @param content the content * @param selection the selected text * @param selectionStartPos the start char position of the selection * @return the context */ public static String getSelectionContext(String content, String selection,int selectionStartPos){ //extract the selection context int beginPos; if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){ beginPos = 0; } else { int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE; beginPos = content.indexOf(' ',start); if(beginPos < 0 || beginPos >= selectionStartPos){ //no words beginPos = start; //begin within a word } } int endPos; if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){ endPos = content.length(); } else { int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE; endPos = content.lastIndexOf(' ', start); if(endPos <= selectionStartPos+selection.length()){ endPos = start; //end within a word; } } return content.substring(beginPos, endPos); } }