package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.util.*;
import java.util.*;
import java.util.function.Function;
/**
* Trigger for CoreMap Node Patterns. Allows for fast identification of which patterns
* may match for one node.
*
* @author Angel Chang
*/
public class CoreMapNodePatternTrigger implements MultiPatternMatcher.NodePatternTrigger<CoreMap> {
Collection<? extends SequencePattern<CoreMap>> patterns;
Collection<SequencePattern<CoreMap>> alwaysTriggered = new LinkedHashSet<>();
TwoDimensionalCollectionValuedMap<Class, Object, SequencePattern<CoreMap>> annotationTriggers =
new TwoDimensionalCollectionValuedMap<>();
TwoDimensionalCollectionValuedMap<Class, String, SequencePattern<CoreMap>> lowercaseStringTriggers =
new TwoDimensionalCollectionValuedMap<>();
public CoreMapNodePatternTrigger(SequencePattern<CoreMap>... patterns) {
this(Arrays.asList(patterns));
}
public CoreMapNodePatternTrigger(Collection<? extends SequencePattern<CoreMap>> patterns) {
this.patterns = patterns;
Function<NodePattern<CoreMap>, StringTriggerCandidate> stringTriggerFilter =
in -> {
if (in instanceof CoreMapNodePattern) {
CoreMapNodePattern p = (CoreMapNodePattern) in;
for (Pair<Class,NodePattern> v:p.getAnnotationPatterns()) {
if (v.second instanceof CoreMapNodePattern.StringAnnotationPattern) {
return new StringTriggerCandidate(v.first, ((CoreMapNodePattern.StringAnnotationPattern) v.second).target,
((CoreMapNodePattern.StringAnnotationPattern) v.second).ignoreCase());
}
}
}
return null;
};
for (SequencePattern<CoreMap> pattern:patterns) {
// Look for first string...
Collection<StringTriggerCandidate> triggerCandidates = pattern.findNodePatterns(stringTriggerFilter, false, true);
// TODO: Select most unlikely to trigger trigger from the triggerCandidates
// (if we had some statistics on most frequent annotation values...., then pick least frequent)
// For now, just pick the longest: going from (text or lemma) to rest
StringTriggerCandidate trigger = triggerCandidates.stream().max(STRING_TRIGGER_CANDIDATE_COMPARATOR).orElse(null);
if (!triggerCandidates.isEmpty()) {
if (trigger.ignoreCase) {
lowercaseStringTriggers.add(trigger.key, trigger.value.toLowerCase(), pattern);
} else {
annotationTriggers.add(trigger.key, trigger.value, pattern);
}
} else {
alwaysTriggered.add(pattern);
}
}
}
private static class StringTriggerCandidate {
Class key;
String value;
boolean ignoreCase;
int keyLevel;
int effectiveValueLength;
public StringTriggerCandidate(Class key, String value, boolean ignoreCase) {
this.key = key;
this.value = value;
this.ignoreCase = ignoreCase;
// Favor text and lemma (more likely to be unique)
this.keyLevel = (CoreAnnotations.TextAnnotation.class.equals(key) || CoreAnnotations.LemmaAnnotation.class.equals(key))? 1:0;
// Special case for -LRB- ( and -RRB- )
this.effectiveValueLength = ("-LRB-".equals(value) || "-RRB-".equals(value))? 1: value.length();
}
}
private static final Comparator<StringTriggerCandidate> STRING_TRIGGER_CANDIDATE_COMPARATOR =
new Comparator<StringTriggerCandidate>() {
@Override
public int compare(StringTriggerCandidate o1, StringTriggerCandidate o2) {
if (o1.keyLevel != o2.keyLevel) {
return (o1.keyLevel < o2.keyLevel)? -1:1;
} else {
int v1 = o1.effectiveValueLength;
int v2 = o2.effectiveValueLength;
if (v1 != v2) return (v1 < v2)? -1:1;
else return 0;
}
}
};
@Override
public Collection<SequencePattern<CoreMap>> apply(CoreMap in) {
Set<SequencePattern<CoreMap>> triggeredPatterns = new LinkedHashSet<>();
triggeredPatterns.addAll(alwaysTriggered);
for (Class key:annotationTriggers.firstKeySet()) {
Object value = in.get(key);
if (value != null) {
Collection<SequencePattern<CoreMap>> triggered = annotationTriggers.get(key, value);
if (triggered != null) {
triggeredPatterns.addAll(triggered);
}
}
}
for (Class key:lowercaseStringTriggers.firstKeySet()) {
Object value = in.get(key);
if (value != null && value instanceof String) {
Collection<SequencePattern<CoreMap>> triggered = lowercaseStringTriggers.get(key, ((String) value).toLowerCase());
if (triggered != null) {
triggeredPatterns.addAll(triggered);
}
}
}
// TODO: triggers for normalized patterns...
return triggeredPatterns;
}
}