package edu.stanford.nlp.naturalli;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.SentenceAnnotator;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
import edu.stanford.nlp.util.*;
import edu.stanford.nlp.naturalli.NaturalLogicAnnotations.*;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* An annotator marking operators with their scope.
* Look at {@link NaturalLogicAnnotator#PATTERNS} for the full list of patterns, otherwise
* {@link NaturalLogicAnnotator#doOneSentence(Annotation, CoreMap)} is the main interface for this class.
*
* TODO(gabor) annotate generics as "most"
*
* @author Gabor Angeli
*/
@SuppressWarnings("unchecked")
public class NaturalLogicAnnotator extends SentenceAnnotator {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(NaturalLogicAnnotator.class);
/**
* A regex for arcs that act as determiners.
*/
private static final String DET = "/det.*|a(dv)?mod|neg|nummod|compound|case/";
/**
* A regex for arcs that we pretend are subject arcs.
*/
private static final String GEN_SUBJ = "/[ni]subj(pass)?/";
/**
* A regex for arcs that we pretend are object arcs.
*/
private static final String GEN_OBJ = "/[di]obj|xcomp|advcl/";
/**
* A regex for arcs that we pretend are copula.
*/
private static final String GEN_COP = "/cop|aux(pass)?/";
/**
* A regex for arcs which denote a sub-clause (e.g., "at Stanford" or "who are at Stanford")
*/
private static final String GEN_CLAUSE = "/nmod|acl:relcl/";
/**
* A regex for arcs which denote a preposition
*/
private static final String GEN_PREP = "/nmod|advcl|ccomp|advmod/";
/**
* A Semgrex fragment for matching a quantifier.
*/
private static final String QUANTIFIER;
static {
Set<String> singleWordQuantifiers = new HashSet<>();
for (Operator q : Operator.values()) {
String[] tokens = q.surfaceForm.split("\\s+");
if (!tokens[tokens.length - 1].startsWith("_")) {
singleWordQuantifiers.add("(" + tokens[tokens.length - 1].toLowerCase() + ")");
}
}
QUANTIFIER = "[ {lemma:/" + StringUtils.join(singleWordQuantifiers, "|") + "/}=quantifier | {pos:CD}=quantifier ]";
}
/**
* The patterns to use for marking quantifier scopes.
*/
private static final List<SemgrexPattern> PATTERNS = Collections.unmodifiableList(new ArrayList<SemgrexPattern>() {{
// { All cats eat mice,
// All cats want milk }
add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_OBJ+" {}=object"));
// { All cats are in boxes,
// All cats voted for Obama,
// All cats have voted for Obama }
add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_PREP+" {}=object"));
// { All cats are cute,
// All cats can purr }
add(SemgrexPattern.compile("{}=object >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_COP+" {}=pivot"));
// { Everyone at Stanford likes cats,
// Everyone who is at Stanford likes cats }
add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >"+GEN_CLAUSE+" {}=subject ) >"+GEN_OBJ+" {}=object"));
// { Everyone at Stanford voted for Colbert }
add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >"+GEN_CLAUSE+" {}=subject ) >"+GEN_PREP+" {}=object"));
// { Felix likes cat food }
add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" {pos:NNP}=Subject >"+GEN_OBJ+" {}=object"));
// { Felix has spoken to Fido }
//nmod used to be prep - problem?
add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" {pos:NNP}=Subject >/nmod|ccomp|[di]obj/ {}=object"));
// { Felix is a cat,
// Felix is cute }
add(SemgrexPattern.compile("{}=object >"+GEN_SUBJ+" {pos:NNP}=Subject >"+GEN_COP+" {}=pivot"));
// { Some cats do n't like dogs }
add(SemgrexPattern.compile("{}=pivot >neg "+QUANTIFIER+" >"+GEN_OBJ+" {}=object"));
// { Obama was not born in Dallas }
add(SemgrexPattern.compile("{}=pivot >/neg/ {}=quantifier >"+GEN_PREP+" {}=object"));
// { All of the cats hate dogs. }
//nmod used to be prep - problem?
add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >/nmod.*/ {}=subject ) >"+GEN_OBJ+" {}=object"));
// add(SemgrexPattern.compile("{pos:/V.*/}=pivot > ( "+QUANTIFIER+" >/nmod.*/ {}=subject ) >"+GEN_SUBJ+" {}=object")); // as above, but handle a common parse error
// { Either cats or dogs have tails. }
add(SemgrexPattern.compile("{pos:/V.*/}=pivot > {lemma:either}=quantifier >"+GEN_SUBJ+" {}=subject >"+GEN_OBJ+" {}=object"));
// { There are cats }
add(SemgrexPattern.compile("{}=quantifier >"+GEN_SUBJ+" {}=pivot >>expl {}"));
}});
// { Cats eat _some_ mice,
// Cats eat _most_ mice }
/**
* A pattern for just trivial unary quantification, in case a quantifier doesn't match any of the patterns in
* {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator#PATTERNS}.
*/
private static SemgrexPattern UNARY_PATTERN = SemgrexPattern.compile("{pos:/N.*/}=subject >"+DET+" "+QUANTIFIER);
/**
* A list of words that suggest their complement has downward polarity.
* For example, "doubt" ("I doubt that X")
*/
private static List<String> DOUBT_WORDS = Arrays.asList("doubt", "skeptical");
/**
* A pattern for recognizing the words in {@link NaturalLogicAnnotator#DOUBT_WORDS}.
*/
private static TokenSequencePattern DOUBT_PATTERN
= TokenSequencePattern.compile("(?$doubt [{ lemma:/" + StringUtils.join(DOUBT_WORDS, "|") + "/}]) (?$target [{lemma:/that|of/}] []+ )");
/** A helper method for
* {@link NaturalLogicAnnotator#getModifierSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)} and
* {@link NaturalLogicAnnotator#getSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)}.
*/
private static Pair<Integer, Integer> getGeneralizedSubtreeSpan(SemanticGraph tree, IndexedWord root, Set<String> validArcs) {
int min = root.index();
int max = root.index();
Queue<IndexedWord> fringe = new LinkedList<>();
for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(root)) {
String edgeLabel = edge.getRelation().getShortName();
if ((validArcs == null || validArcs.contains(edgeLabel)) &&
!"punct".equals(edgeLabel)) {
fringe.add(edge.getDependent());
}
}
while (!fringe.isEmpty()) {
IndexedWord node = fringe.poll();
min = Math.min(node.index(), min);
max = Math.max(node.index(), max);
// ignore punctuation
fringe.addAll(tree.getOutEdgesSorted(node).stream().filter(edge -> edge.getGovernor().equals(node) &&
!(edge.getGovernor().equals(edge.getDependent())) &&
!"punct".equals(edge.getRelation().getShortName())).map(SemanticGraphEdge::getDependent).collect(Collectors.toList()));
}
return Pair.makePair(min, max + 1);
}
private static final Set<String> MODIFIER_ARCS = Collections.unmodifiableSet(new HashSet<String>() {{
add("aux");
add("nmod");
}});
private static final Set<String> NOUN_COMPONENT_ARCS = Collections.unmodifiableSet(new HashSet<String>() {{
add("compound");
}});
/**
* Returns the yield span for the word rooted at the given node, but only traversing a fixed set of relations.
* @param tree The dependency graph to get the span from.
* @param root The root word of the span.
* @return A one indexed span rooted at the given word.
*/
private static Pair<Integer, Integer> getModifierSubtreeSpan(SemanticGraph tree, IndexedWord root) {
return getGeneralizedSubtreeSpan(tree, root, MODIFIER_ARCS);
}
/**
* Returns the yield span for the word rooted at the given node, but only traversing relations indicative
* of staying in the same noun phrase.
* @param tree The dependency graph to get the span from.
* @param root The root word of the span.
* @return A one indexed span rooted at the given word.
*/
private static Pair<Integer, Integer> getProperNounSubtreeSpan(SemanticGraph tree, IndexedWord root) {
return getGeneralizedSubtreeSpan(tree, root, NOUN_COMPONENT_ARCS);
}
/**
* Returns the yield span for the word rooted at the given node. So, for example, all cats like dogs rooted at the word
* "cats" would yield a span (1, 3) -- "all cats".
* @param tree The dependency graph to get the span from.
* @param root The root word of the span.
* @return A one indexed span rooted at the given word.
*/
private static Pair<Integer, Integer> getSubtreeSpan(SemanticGraph tree, IndexedWord root) {
return getGeneralizedSubtreeSpan(tree, root, null);
}
/**
* Effectively, merge two spans
*/
private static Pair<Integer, Integer> includeInSpan(Pair<Integer, Integer> span, Pair<Integer, Integer> toInclude) {
return Pair.makePair(Math.min(span.first, toInclude.first), Math.max(span.second, toInclude.second));
}
/**
* Exclude the second span from the first, if the second is on the edge of the first. If the second is in the middle, it's
* unclear what this function should do, so it just returns the original span.
*/
private static Pair<Integer, Integer> excludeFromSpan(Pair<Integer, Integer> span, Pair<Integer, Integer> toExclude) {
if (toExclude.second <= span.first || toExclude.first >= span.second) {
// Case: toExclude is outside of the span anyways
return span;
} else if (toExclude.first <= span.first && toExclude.second > span.first) {
// Case: overlap on the front
return Pair.makePair(toExclude.second, span.second);
} else if (toExclude.first < span.second && toExclude.second >= span.second) {
// Case: overlap on the front
return Pair.makePair(span.first, toExclude.first);
} else if (toExclude.first > span.first && toExclude.second < span.second) {
// Case: toExclude is within the span
return span;
} else {
throw new IllegalStateException("This case should be impossible");
}
}
/**
* Compute the span for a given matched pattern.
* At a high level:
*
* <ul>
* <li>If both a subject and an object exist, we take the subject minus the quantifier, and the object plus the pivot. </li>
* <li>If only an object exists, we make the subject the object, and create a dummy object to signify a one-place quantifier. </li>
* <li>If neither the subject or object exist, the pivot is the subject and there is no object. </li>
* <li>If the subject is a proper noun, only mark the object itself with the subject span. </li>
* </ul>
*
* But:
*
* <ul>
* <li>If we have a two-place quantifier, the object is allowed to absorb various specific arcs from the pivot.</li>
* <li>If we have a one-place quantifier, the object is allowed to absorb only prepositions from the pivot.</li>
* </ul>
*/
private static OperatorSpec computeScope(SemanticGraph tree, Operator operator,
IndexedWord pivot, Pair<Integer, Integer> quantifierSpan,
IndexedWord subject, boolean isProperNounSubject, IndexedWord object,
int sentenceLength) {
Pair<Integer, Integer> subjSpan;
Pair<Integer, Integer> objSpan;
if (subject == null && object == null) {
subjSpan = getSubtreeSpan(tree, pivot);
if (Span.fromPair(subjSpan).contains(Span.fromPair(quantifierSpan))) {
// Don't consume the quantifier -- take only the part after the quantifier
subjSpan = Pair.makePair(Math.max(subjSpan.first, quantifierSpan.second), subjSpan.second);
if (subjSpan.second <= subjSpan.first) {
subjSpan = Pair.makePair(subjSpan.first, subjSpan.first + 1);
}
} else {
// Exclude the quantifier from the span
subjSpan = excludeFromSpan(subjSpan, quantifierSpan);
}
objSpan = Pair.makePair(subjSpan.second, subjSpan.second);
} else if (subject == null) {
subjSpan = includeInSpan(getSubtreeSpan(tree, object), getGeneralizedSubtreeSpan(tree, pivot, Collections.singleton("nmod")));
objSpan = Pair.makePair(subjSpan.second, subjSpan.second);
} else {
Pair<Integer, Integer> subjectSubtree;
if (isProperNounSubject) {
subjectSubtree = getProperNounSubtreeSpan(tree, subject);
} else {
subjectSubtree = getSubtreeSpan(tree, subject);
}
subjSpan = excludeFromSpan(subjectSubtree, quantifierSpan);
objSpan = excludeFromSpan(includeInSpan(getSubtreeSpan(tree, object), getModifierSubtreeSpan(tree, pivot)), subjectSubtree);
}
// Return scopes
if (subjSpan.first < quantifierSpan.second && subjSpan.second > quantifierSpan.second) {
subjSpan = Pair.makePair(quantifierSpan.second, subjSpan.second);
}
return new OperatorSpec(operator,
quantifierSpan.first - 1, quantifierSpan.second - 1,
subjSpan.first - 1, subjSpan.second - 1,
objSpan.first - 1, objSpan.second - 1,
sentenceLength);
}
/**
* Try to find which quantifier we matched, given that we matched the head of a quantifier at the given IndexedWord, and that
* this whole deal is taking place in the given sentence.
*
* @param sentence The sentence we are matching.
* @param quantifier The word at which we matched a quantifier.
* @return An optional triple consisting of the particular quantifier we matched, as well as the span of that quantifier in the sentence.
*/
private static Optional<Triple<Operator,Integer,Integer>> validateQuantifierByHead(CoreMap sentence, IndexedWord quantifier) {
// Some useful variables
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
Function<CoreLabel, String> glossFn = (label) -> "CD".equals(label.tag()) ? "--NUM--" : label.lemma();
int quantIndex = quantifier.index();
// Look forward a bit too, if the head is a number.
int[] positiveOffsetToCheck = "CD".equals(tokens.get(quantIndex - 1).tag()) ? new int[]{2, 1, 0} : new int[]{0};
// Try searching backwards for the right quantifier
for (int offsetEnd : positiveOffsetToCheck) {
int end = quantIndex + offsetEnd;
for (int start = Math.max(0, quantIndex - 10); start < quantIndex; ++start) {
String gloss = StringUtils.join(tokens, " ", glossFn, start, end).toLowerCase();
for (Operator q : Operator.valuesByLengthDesc) {
if (q.surfaceForm.equals(gloss)) {
return Optional.of(Triple.makeTriple(q, start + 1, end + 1));
}
}
}
}
return Optional.empty();
}
/**
* Find the operators in this sentence, annotating the head word (only!) of each operator with the
* {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.OperatorAnnotation}.
*
* @param sentence As in {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator#doOneSentence(edu.stanford.nlp.pipeline.Annotation, edu.stanford.nlp.util.CoreMap)}
*/
private void annotateOperators(CoreMap sentence) {
SemanticGraph tree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
if (tree == null) {
tree = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
}
for (SemgrexPattern pattern : PATTERNS) {
SemgrexMatcher matcher = pattern.matcher(tree);
while (matcher.find()) {
// Get terms
IndexedWord properSubject = matcher.getNode("Subject");
IndexedWord quantifier, subject;
boolean namedEntityQuantifier = false;
if (properSubject != null) {
quantifier = subject = properSubject;
namedEntityQuantifier = true;
} else {
quantifier = matcher.getNode("quantifier");
subject = matcher.getNode("subject");
}
// Validate quantifier
// At the end of this
Optional<Triple<Operator,Integer,Integer>> quantifierInfo;
if (namedEntityQuantifier) {
// named entities have the "all" semantics by default.
if (!neQuantifiers) {
continue;
}
quantifierInfo = Optional.of(Triple.makeTriple(Operator.IMPLICIT_NAMED_ENTITY, quantifier.index(), quantifier.index())); // note: empty quantifier span given
} else {
// find the quantifier, and return some info about it.
quantifierInfo = validateQuantifierByHead(sentence, quantifier);
}
// Awful hacks to regularize the subject of things like "one of" and "there are"
// (fix up 'there are')
if ("be".equals(subject == null ? null : subject.lemma())) {
boolean hasExpl = false;
IndexedWord newSubject = null;
for (SemanticGraphEdge outgoingEdge : tree.outgoingEdgeIterable(subject)) {
if ("nsubj".equals(outgoingEdge.getRelation().toString())) {
newSubject = outgoingEdge.getDependent();
} else if ("expl".equals(outgoingEdge.getRelation().toString())) {
hasExpl = true;
}
}
if (hasExpl) {
subject = newSubject;
}
}
// (fix up '$n$ of')
if ("CD".equals(subject == null ? null : subject.tag())) {
for (SemanticGraphEdge outgoingEdge : tree.outgoingEdgeIterable(subject)) {
String rel = outgoingEdge.getRelation().toString();
if (rel.startsWith("nmod")) {
subject = outgoingEdge.getDependent();
}
}
}
// Set tokens
if (quantifierInfo.isPresent()) {
// Compute span
OperatorSpec scope = computeScope(tree, quantifierInfo.get().first,
matcher.getNode("pivot"), Pair.makePair(quantifierInfo.get().second, quantifierInfo.get().third),
subject, namedEntityQuantifier, matcher.getNode("object"), tokens.size());
// Set annotation
CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(quantifier.index() - 1);
OperatorSpec oldScope = token.get(OperatorAnnotation.class);
if (oldScope == null || oldScope.quantifierLength() < scope.quantifierLength() ||
oldScope.instance != scope.instance) {
token.set(OperatorAnnotation.class, scope);
} else {
token.set(OperatorAnnotation.class, OperatorSpec.merge(oldScope, scope));
}
}
}
}
// Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap.
// In these cases, take the longer quantifier match.
List<OperatorSpec> quantifiers = new ArrayList<>();
sentence.get(CoreAnnotations.TokensAnnotation.class).stream()
.filter(token -> token.containsKey(OperatorAnnotation.class))
.forEach(token -> quantifiers.add(token.get(OperatorAnnotation.class)));
quantifiers.sort( (x, y) -> y.quantifierLength() - x.quantifierLength());
for (OperatorSpec quantifier : quantifiers) {
for (int i = quantifier.quantifierBegin; i < quantifier.quantifierEnd; ++i) {
if (i != quantifier.quantifierHead) {
tokens.get(i).remove(OperatorAnnotation.class);
}
}
}
}
/**
* Annotate any unary quantifiers that weren't found in the main {@link NaturalLogicAnnotator#annotateOperators(CoreMap)} method.
* @param sentence The sentence to annotate.
*/
private static void annotateUnaries(CoreMap sentence) {
// Get tree and tokens
SemanticGraph tree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
if (tree == null) {
tree = sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
}
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
// Get operator exists mask
boolean[] isOperator = new boolean[tokens.size()];
for (int i = 0; i < isOperator.length; ++i) {
OperatorSpec spec = tokens.get(i).get(OperatorAnnotation.class);
if (spec != null) {
for (int k = spec.quantifierBegin; k < spec.quantifierEnd; ++k) {
isOperator[k] = true;
}
}
}
// Match Semgrex
SemgrexMatcher matcher = UNARY_PATTERN.matcher(tree);
while (matcher.find()) {
// Get relevant nodes
IndexedWord quantifier = matcher.getNode("quantifier");
String word = quantifier.word().toLowerCase();
if (word.equals("a") || word.equals("an") || word.equals("the") ||
"CD".equals(quantifier.tag())) {
continue; // These are absurdly common, and uninformative, and we're just going to shoot ourselves in the foot from parsing errors and idiomatic expressions.
}
IndexedWord subject = matcher.getNode("subject");
// ... If there is not already an operator there
if (!isOperator[quantifier.index() - 1]) {
Optional<Triple<Operator, Integer, Integer>> quantifierInfo = validateQuantifierByHead(sentence, quantifier);
// ... and if we found a quantifier span
if (quantifierInfo.isPresent()) {
// Then add the unary operator!
OperatorSpec scope = computeScope(tree, quantifierInfo.get().first,
subject, Pair.makePair(quantifierInfo.get().second, quantifierInfo.get().third),
null, false, null, tokens.size());
CoreLabel token = tokens.get(quantifier.index() - 1);
token.set(OperatorAnnotation.class, scope);
}
}
}
// Match TokensRegex
TokenSequenceMatcher tokenMatcher = DOUBT_PATTERN.matcher(tokens);
while (tokenMatcher.find()) {
List<CoreLabel> doubt = (List<CoreLabel>) tokenMatcher.groupNodes("$doubt");
List<CoreLabel> target = (List<CoreLabel>) tokenMatcher.groupNodes("$target");
for (CoreLabel word : doubt) {
OperatorSpec spec = new OperatorSpec(Operator.GENERAL_NEG_POLARITY,
word.index() - 1, word.index(),
target.get(0).index() - 1, target.get(target.size() - 1).index(),
0, 0,
tokens.size());
word.set(OperatorAnnotation.class, spec);
}
}
}
/**
* Annotate every token for its polarity, based on the operators found. This function will set the
* {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.PolarityAnnotation} for every token.
*
* @param sentence As in {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator#doOneSentence(edu.stanford.nlp.pipeline.Annotation, edu.stanford.nlp.util.CoreMap)}
*/
private static void annotatePolarity(CoreMap sentence) {
// Collect all the operators in this sentence
List<OperatorSpec> operators = new ArrayList<>();
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
for (CoreLabel token : tokens) {
OperatorSpec specOrNull = token.get(OperatorAnnotation.class);
if (specOrNull != null) {
operators.add(specOrNull);
}
}
// Make sure every node of the dependency tree has a polarity.
// This is separate from the code below in case the tokens in the dependency
// tree don't correspond to the tokens in the sentence. This happens at least
// when the constituency parser craps out on a long sentence, and the
// dependency tree is put together haphazardly.
if (sentence.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)) {
for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).vertexSet()) {
token.set(PolarityAnnotation.class, Polarity.DEFAULT);
}
}
if (sentence.containsKey(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class)) {
for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class).vertexSet()) {
token.set(PolarityAnnotation.class, Polarity.DEFAULT);
}
}
if (sentence.containsKey(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) {
for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class).vertexSet()) {
token.set(PolarityAnnotation.class, Polarity.DEFAULT);
}
}
// Set polarity for each token
for (int i = 0; i < tokens.size(); ++i) {
CoreLabel token = tokens.get(i);
// Get operators in scope
List<Triple<Integer, Monotonicity, MonotonicityType>> inScope = new ArrayList<>(4);
for (OperatorSpec operator : operators) {
if (i >= operator.subjectBegin && i < operator.subjectEnd) {
inScope.add(Triple.makeTriple(operator.subjectEnd - operator.subjectBegin, operator.instance.subjMono, operator.instance.subjType));
} else if (i >= operator.objectBegin && i < operator.objectEnd) {
inScope.add(Triple.makeTriple(operator.objectEnd - operator.objectBegin, operator.instance.objMono, operator.instance.objType));
}
}
// Sort the operators by their scope (approximated by the size of their argument span
inScope.sort( (x, y) -> y.first - x.first);
// Create polarity
List<Pair<Monotonicity, MonotonicityType>> info = new ArrayList<>(inScope.size());
for (Triple<Integer, Monotonicity, MonotonicityType> term : inScope) {
info.add(Pair.makePair(term.second, term.third));
}
Polarity polarity = new Polarity(info);
// Set polarity
token.set(PolarityAnnotation.class, polarity);
}
}
/**
* If false, don't annotate tokens for polarity but only find the operators and their scopes.
*/
@ArgumentParser.Option(name="doPolarity", gloss="Mark polarity in addition to quantifier scopes")
private boolean doPolarity = true;
@ArgumentParser.Option(name="neQuantifiers", gloss="If true, mark named entities as quantifiers.")
private boolean neQuantifiers = false;
/**
* Create a new annotator.
* @param annotatorName The prefix for the properties for this annotator.
* @param props The properties to configure this annotator with.
*/
public NaturalLogicAnnotator(String annotatorName, Properties props) {
ArgumentParser.fillOptions(this, annotatorName, props);
}
/**
* @see edu.stanford.nlp.naturalli.NaturalLogicAnnotator#NaturalLogicAnnotator(String, java.util.Properties)
*/
public NaturalLogicAnnotator(Properties props) {
this(STANFORD_NATLOG, props);
}
/** The default constructor */
public NaturalLogicAnnotator() {
this("__irrelevant__", new Properties());
}
/** {@inheritDoc} */
@Override
protected void doOneSentence(Annotation annotation, CoreMap sentence) {
annotateOperators(sentence);
annotateUnaries(sentence);
if (doPolarity) {
annotatePolarity(sentence);
}
}
/** {@inheritDoc} */
@Override
protected int nThreads() {
return 1;
}
/** {@inheritDoc} */
@Override
protected long maxTime() {
return -1;
}
/** {@inheritDoc} */
@Override
protected void doOneFailedSentence(Annotation annotation, CoreMap sentence) {
log.info("Failed to annotate: " + sentence.get(CoreAnnotations.TextAnnotation.class));
}
/** {@inheritDoc} */
@Override
public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
doPolarity ? NaturalLogicAnnotations.PolarityAnnotation.class : null,
NaturalLogicAnnotations.OperatorAnnotation.class
)));
}
/** {@inheritDoc} */
@Override
public Set<Class<? extends CoreAnnotation>> requires() {
return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
CoreAnnotations.TextAnnotation.class,
CoreAnnotations.TokensAnnotation.class,
CoreAnnotations.IndexAnnotation.class,
CoreAnnotations.SentencesAnnotation.class,
CoreAnnotations.SentenceIndexAnnotation.class,
CoreAnnotations.PartOfSpeechAnnotation.class,
CoreAnnotations.LemmaAnnotation.class,
SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class,
SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class,
SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class
)));
}
}