/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.sentiment.summarize;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.Sentence;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
/**
* This class is used to represents a {@link Token} that holds a Sentiment in the
* context of a {@link Sentence}. Sentiment might be {@link #addNegate(Token) negated}
* and be {@link #addAbout(Token) assigned} to a Noun or Pronoun via a
* {@link #getVerb() Verb}. The {@link #getStart()} and {@link #getEnd()} values
* return the span selected by this Sentiment. This are the lowest start and
* highest end values of any token related with this sentiment. Those spans are
* used by the {@link SentimentPhrase} class for clustering {@link Sentiment}s
* to phrases.
*
* @author Rupert Westenthaler
*
*/
public class Sentiment {
/**
* Minimum POS tag confidence so that the annotated POS is used without
* considering the {@link #PREF_LEX_CAT}
*/
private static final double MIN_POS_CONF = 0.85;
/**
* if the confidence of the main POS tag is lower then {@link #MIN_POS_CONF},
* than all POS tags are searched for the an POS annotation compatible with
* {@link #PREF_LEX_CAT}.
*/
private static final Set<LexicalCategory> PREF_LEX_CAT = EnumSet.of(LexicalCategory.Adjective);
/**
* The token holding the sentiment
*/
private final Token token;
/**
* The (not negated) value of the sentiment
*/
private final double value;
/**
* The Sentence of the {@link #token}
*/
private final Sentence sentence;
/**
* List of tokens that negate this sentiment. <code>null</code> if no
* negation was added
*/
private List<Token> negated;
/**
* The Nouns and/or Pronouns this sentiment is about. <code>null</code> if
* no aboutness is defined
*/
private List<Token> aboutness;
/**
* The PosTag of the of the {@link #token}
*/
private final PosTag posTag;
/**
* The start position of this sentiment. This is the lowest start of any
* token added to this sentiment. This field is set by {@link #checkSpan(Token)}
*/
private int start;
/**
* The end position of this sentiment. This is the highest end of any
* token added to this sentiment. This field is set by {@link #checkSpan(Token)}
*/
private int end;
/**
* The verb assigning this sentiment to the Nouns and/or Pronouns added
* by {@link #addAbout(Token)}.
*/
private Token verb;
/**
* The Token with the sentiment, the value of the sentiment and optionally
* the Sentence for the token
* @param token
* @param value
* @param sentence
*/
public Sentiment(Token token, double value, Sentence sentence) {
this.token = token;
this.value = value;
this.sentence = sentence;
this.start = token.getStart();
this.end = token.getEnd();
List<Value<PosTag>> tags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION);
PosTag posTag = null;
if(tags != null && !tags.isEmpty()){
for(Value<PosTag> tag : tags){
if(tag.probability() == Value.UNKNOWN_PROBABILITY ||
tag.probability() >= MIN_POS_CONF ||
!Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){
posTag = tag.value();
break;
}
}
if(posTag == null){
posTag = tags.get(0).value();
}
if(posTag.hasCategory(LexicalCategory.Noun)){
addAbout(token); //add the token also as noun
}
if(posTag.hasCategory(LexicalCategory.Verb)){
setVerb(token);
}
}
this.posTag = posTag;
}
/**
* Adds an Token that negates this Sentiment
* @param token the token
*/
protected void addNegate(Token token){
if(negated == null){ //most of the time a singeltonList will do
negated = Collections.singletonList(token);
} else if(negated.size() == 1){
List<Token> l = new ArrayList<Token>(4);
l.add(negated.get(0));
l.add(token);
negated = l;
}
checkSpan(token);
}
protected void setVerb(Token verb) {
this.verb = verb;
checkSpan(verb);
}
protected void addAbout(Token noun) {
if(aboutness == null){
aboutness = new ArrayList<Token>(4);
}
aboutness.add(noun);
checkSpan(noun);
}
/**
* Checks the {@link #start} {@link #end} values against the span selected
* by the parsed token.<p>
* This method is called by all others that do add tokens.
* @param token the added token
*/
private void checkSpan(Token token) {
if(start > token.getStart()){
start = token.getStart();
}
if(end < token.getEnd()){
end = token.getEnd();
}
}
/**
* The POS tag of the Token with a sentiment.
* @return
*/
public PosTag getPosTag() {
return posTag;
}
/**
* The Sentiment value (considering possible negations)
* @return the sentiment value
*/
public double getValue() {
return negated == null ? value : value*-1;
}
/**
* The Token holding the sentiment
* @return the token
*/
public Token getToken() {
return token;
}
public Sentence getSentence() {
return sentence;
}
/**
* The {@link AnalysedText Text}
* @return the text
*/
public AnalysedText getAnalysedText(){
return token.getContext();
}
/**
* The tokens negating this Sentiment
* @return the tokens or an empty list if none
*/
public List<Token> getNegates() {
return negated == null ? Collections.<Token>emptyList() : negated;
}
/**
* The Nouns or Pronoun(s) the Sentiment is about
* @return the tokens or an empty list if none.
*/
public List<Token> getAboutness() {
return aboutness == null ? Collections.<Token>emptyList() : aboutness;
}
/**
* The verb used to assign Adjectives to the Nouns (or Pronouns)
* @return
*/
public Token getVerb() {
return verb;
}
/**
* The start position of this sentiment. This is the lowest start of any
* token linked to this sentiment
* @return the start position
*/
public int getStart(){
return start;
}
/**
* The end position of this sentiment. This is the highest end of any
* token linked to this sentiment
* @return the end position
*/
public int getEnd(){
return end;
}
@Override
public String toString() {
return new StringBuilder("Sentiment [").append(start).append(',').append(end).append("]:")
.append(token).append('@')
.append(getValue()).append(" | negations: ").append(getNegates())
.append(" | about: ").append(getAboutness()).append(" | verb: ").append(verb).toString();
}
@Override
public int hashCode() {
return token.hashCode();
}
@Override
public boolean equals(Object obj) {
return obj instanceof Sentiment && token.equals(((Sentiment)obj).token)
&& value == ((Sentiment)obj).value && ((negated == null
&& ((Sentiment)obj).negated == null) || (negated != null
&& !negated.isEmpty() && ((Sentiment)obj).negated != null &&
!((Sentiment)obj).negated.isEmpty()));
}
}