package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.pipeline.CoreMapAttributeAggregator;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Interval;
import java.util.*;
/**
* Pattern for matching across multiple core maps.
*
* <p>
* This class allows for string matches across tokens. It is not implemented efficiently
* (it basically creates a big pretend token and tries to do string match on that)
* so can be expensive to use. Whenever possible, <code>SequencePattern</code> should be used instead.
* </p>
*
* @author Angel Chang
*/
public class MultiCoreMapNodePattern extends MultiNodePattern<CoreMap> {
Map<Class, CoreMapAttributeAggregator> aggregators = CoreMapAttributeAggregator.getDefaultAggregators();
NodePattern nodePattern;
public MultiCoreMapNodePattern() {}
public MultiCoreMapNodePattern(NodePattern nodePattern) {
this.nodePattern = nodePattern;
}
public MultiCoreMapNodePattern(NodePattern nodePattern, Map<Class, CoreMapAttributeAggregator> aggregators) {
this.nodePattern = nodePattern;
this.aggregators = aggregators;
}
protected Collection<Interval<Integer>> match(List<? extends CoreMap> nodes, int start)
{
List<Interval<Integer>> matched = new ArrayList<>();
int minEnd = start + minNodes;
int maxEnd = nodes.size();
if (maxNodes >= 0 && maxNodes + start < nodes.size()) {
maxEnd = maxNodes + start;
}
for (int end = minEnd; end <= maxEnd; end++) {
CoreMap chunk = ChunkAnnotationUtils.getMergedChunk(nodes, start, end, aggregators, null);
if (nodePattern.match(chunk)) {
matched.add(Interval.toInterval(start, end));
}
}
return matched;
}
public static class StringSequenceAnnotationPattern extends MultiNodePattern<CoreMap> {
Class textKey;
PhraseTable phraseTable;
public StringSequenceAnnotationPattern(Class textKey, Set<List<String>> targets, boolean ignoreCase) {
this.textKey = textKey;
phraseTable = new PhraseTable(false, ignoreCase, false);
for (List<String> target:targets) {
phraseTable.addPhrase(target);
if (maxNodes < 0 || target.size() > maxNodes) maxNodes = target.size();
}
}
public StringSequenceAnnotationPattern(Class textKey, Set<List<String>> targets) {
this(textKey, targets, false);
}
public StringSequenceAnnotationPattern(Class textKey, Map<List<String>, Object> targets, boolean ignoreCase) {
this.textKey = textKey;
phraseTable = new PhraseTable(false, ignoreCase, false);
for (List<String> target:targets.keySet()) {
phraseTable.addPhrase(target, null, targets.get(target));
if (maxNodes < 0 || target.size() > maxNodes) maxNodes = target.size();
}
}
public StringSequenceAnnotationPattern(Class textKey, Map<List<String>, Object> targets) {
this(textKey, targets, false);
}
protected Collection<Interval<Integer>> match(List<? extends CoreMap> nodes, int start) {
PhraseTable.WordList words = new PhraseTable.TokenList(nodes, textKey);
List<PhraseTable.PhraseMatch> matches = phraseTable.findMatches(words, start, nodes.size(), false);
Collection<Interval<Integer>> intervals = new ArrayList<>(matches.size());
for (PhraseTable.PhraseMatch match:matches) {
intervals.add(match.getInterval());
}
return intervals;
}
public String toString() {
return ":" + phraseTable;
}
}
}