package edu.stanford.nlp.ling.tokensregex.matcher;
import edu.stanford.nlp.util.*;
import java.util.*;
import java.util.function.Function;
/**
* The <code>TrieMapMatcher</code> provides functions to match against a trie.
* It can be used to:
* - Find matches in a document (findAllMatches and findNonOverlapping)
* - Find approximate matches in a document (findClosestMatches)
* - Segment a sequence based on entries in the trie (segment)
*
* TODO: Have TrieMapMatcher implement a matcher interface
*
* @author Angel Chang
*/
public class TrieMapMatcher<K,V> {
TrieMap<K,V> root;
TrieMap<K,V> rootWithDelimiter;
List<K> multimatchDelimiter;
public TrieMapMatcher(TrieMap<K, V> root) {
this.root = root;
this.rootWithDelimiter = root;
}
public TrieMapMatcher(TrieMap<K, V> root, List<K> multimatchDelimiter) {
this.root = root;
this.multimatchDelimiter = multimatchDelimiter;
if (multimatchDelimiter != null && !multimatchDelimiter.isEmpty()) {
// Create a new root that always starts with the delimiter
rootWithDelimiter = new TrieMap<>();
rootWithDelimiter.putChildTrie(multimatchDelimiter, root);
} else {
rootWithDelimiter = root;
}
}
/**
* Given a target sequence, returns the n closes matches (or sequences of matches) from the trie.
* The cost function used is a exact match cost function (exact match has cost 0, otherwise, cost is 1)
* @param target Target sequence to match
* @param n Number of matches to return. The actual number of matches may be less.
* @return List of approximate matches
*/
public List<ApproxMatch<K,V>> findClosestMatches(K[] target, int n) {
return findClosestMatches(Arrays.asList(target), n);
}
/**
* Given a target sequence, returns the n closes matches (or sequences of matches) from the trie.
* The cost function used is a exact match cost function (exact match has cost 0, otherwise, cost is 1)
* @param target Target sequence to match
* @param n Number of matches to return. The actual number of matches may be less.
* @param multimatch If true, attempt to return matches with sequences of elements from the trie.
* Otherwise, only each match will contain one element from the trie.
* @param keepAlignments If true, alignment information is returned
* @return List of approximate matches
*/
public List<ApproxMatch<K,V>> findClosestMatches(K[] target, int n, boolean multimatch, boolean keepAlignments) {
return findClosestMatches(Arrays.asList(target), n, multimatch, keepAlignments);
}
/**
* Given a target sequence, returns the n closes matches (or sequences of matches) from the trie
* based on the cost function (lower cost mean better match).
* @param target Target sequence to match
* @param costFunction Cost function to use
* @param maxCost Matches with a cost higher than this are discarded
* @param n Number of matches to return. The actual number of matches may be less.
* @param multimatch If true, attempt to return matches with sequences of elements from the trie.
* Otherwise, only each match will contain one element from the trie.
* @param keepAlignments If true, alignment information is returned
* @return List of approximate matches
*/
public List<ApproxMatch<K,V>> findClosestMatches(K[] target, MatchCostFunction<K,V> costFunction,
Double maxCost, int n, boolean multimatch, boolean keepAlignments) {
return findClosestMatches(Arrays.asList(target), costFunction, maxCost, n, multimatch, keepAlignments);
}
/**
* Given a target sequence, returns the n closes matches (or sequences of matches) from the trie.
* The cost function used is a exact match cost function (exact match has cost 0, otherwise, cost is 1)
* @param target Target sequence to match
* @param n Number of matches to return. The actual number of matches may be less.
* @return List of approximate matches
*/
public List<ApproxMatch<K,V>> findClosestMatches(List<K> target, int n) {
return findClosestMatches(target, TrieMapMatcher.<K,V>defaultCost(), Double.MAX_VALUE, n, false, false);
}
/**
* Given a target sequence, returns the n closes matches (or sequences of matches) from the trie.
* The cost function used is a exact match cost function (exact match has cost 0, otherwise, cost is 1)
* @param target Target sequence to match
* @param n Number of matches to return. The actual number of matches may be less.
* @param multimatch If true, attempt to return matches with sequences of elements from the trie.
* Otherwise, only each match will contain one element from the trie.
* @param keepAlignments If true, alignment information is returned
* @return List of approximate matches
*/
public List<ApproxMatch<K,V>> findClosestMatches(List<K> target, int n, boolean multimatch, boolean keepAlignments) {
return findClosestMatches(target, TrieMapMatcher.<K,V>defaultCost(), Double.MAX_VALUE, n, multimatch, keepAlignments);
}
/**
* Given a target sequence, returns the n closes matches (or sequences of matches) from the trie
* based on the cost function (lower cost mean better match).
* @param target Target sequence to match
* @param costFunction Cost function to use
* @param maxCost Matches with a cost higher than this are discarded
* @param n Number of matches to return. The actual number of matches may be less.
* @param multimatch If true, attempt to return matches with sequences of elements from the trie.
* Otherwise, only each match will contain one element from the trie.
* @param keepAlignments If true, alignment information is returned
* @return List of approximate matches
*/
public List<ApproxMatch<K,V>> findClosestMatches(List<K> target, MatchCostFunction<K,V> costFunction,
double maxCost, int n, boolean multimatch, boolean keepAlignments) {
if (root.isEmpty()) return null;
int extra = 3;
// Find the closest n options to the key in the trie based on the given cost function for substitution
// matches[i][j] stores the top n partial matches for i elements from the target
// and j elements from the partial matches from trie keys
// At any time, we only keep track of the last two rows
// (prevMatches (matches[i-1][j]), curMatches (matches[i][j]) that we are working on
MatchQueue<K,V> best = new MatchQueue<>(n, maxCost);
List<PartialApproxMatch<K,V>>[] prevMatches = null;
List<PartialApproxMatch<K,V>>[] curMatches;
for (int i = 0; i <= target.size(); i++) {
curMatches = new List[target.size()+1+extra];
for (int j = 0; j <= target.size()+extra; j++) {
if (j > 0) {
boolean complete = (i == target.size());
// Try to pick best match from trie
K t = (i > 0 && i <= target.size())? target.get(i-1):null;
// Look at the top n choices we saved away and pick n new options
MatchQueue<K,V> queue = (multimatch)? new MultiMatchQueue<>(n, maxCost): new MatchQueue<>(n, maxCost);
if (i > 0) {
for (PartialApproxMatch<K,V> pam:prevMatches[j-1]) {
if (pam.trie != null) {
if (pam.trie.children != null) {
for (K k:pam.trie.children.keySet()) {
addToQueue(queue, best, costFunction, pam, t, k, multimatch, complete);
}
}
}
}
}
for (PartialApproxMatch<K,V> pam:curMatches[j-1]) {
if (pam.trie != null) {
if (pam.trie.children != null) {
for (K k:pam.trie.children.keySet()) {
addToQueue(queue, best, costFunction, pam, null, k, multimatch, complete);
}
}
}
}
if (i > 0) {
for (PartialApproxMatch<K,V> pam:prevMatches[j]) {
addToQueue(queue, best, costFunction, pam, t, null, multimatch, complete);
}
}
curMatches[j] = queue.toSortedList();
} else {
curMatches[0] = new ArrayList<>();
if (i > 0) {
K t = (i < target.size())? target.get(i-1):null;
for (PartialApproxMatch<K,V> pam:prevMatches[0]) {
PartialApproxMatch<K,V> npam = pam.withMatch(costFunction, costFunction.cost(t, null, pam.getMatchedLength()), t, null);
if (npam.cost <= maxCost) {
curMatches[0].add(npam);
}
}
} else {
curMatches[0].add(new PartialApproxMatch<>(0, root, keepAlignments ? target.size() : 0));
}
}
// System.out.println("i=" + i + ",j=" + j + "," + matches[i][j]);
}
prevMatches = curMatches;
}
// Get the best matches
List<ApproxMatch<K,V>> res = new ArrayList<>();
for (PartialApproxMatch<K,V> m:best.toSortedList()) {
res.add(m.toApproxMatch());
}
return res;
}
/**
* Given a sequence to search through (e.g. piece of text would be a sequence of words),
* finds all matching sub-sequences that matches entries in the trie
* @param list Sequence to search through
* @return List of matches
*/
public List<Match<K,V>> findAllMatches(K ... list) {
return findAllMatches(Arrays.asList(list));
}
/**
* Given a sequence to search through (e.g. piece of text would be a sequence of words),
* finds all matching sub-sequences that matches entries in the trie
* @param list Sequence to search through
* @return List of matches
*/
public List<Match<K,V>> findAllMatches(List<K> list) {
return findAllMatches(list, 0, list.size());
}
/**
* Given a sequence to search through (e.g. piece of text would be a sequence of words),
* finds all matching sub-sequences that matches entries in the trie
* @param list Sequence to search through
* @param start start index to start search at
* @param end end index (exclusive) to end search at
* @return List of matches
*/
public List<Match<K,V>> findAllMatches(List<K> list, int start, int end) {
List<Match<K,V>> allMatches = new ArrayList<>();
updateAllMatches(root, allMatches, new ArrayList<>(), list, start, end);
return allMatches;
}
/**
* Given a sequence to search through (e.g. piece of text would be a sequence of words),
* finds all non-overlapping matching sub-sequences that matches entries in the trie.
* Sub-sequences that are longer are preferred, then sub-sequences that starts earlier.
* @param list Sequence to search through
* @return List of matches sorted by start position
*/
public List<Match<K,V>> findNonOverlapping(K ... list) {
return findNonOverlapping(Arrays.asList(list));
}
/**
* Given a sequence to search through (e.g. piece of text would be a sequence of words),
* finds all non-overlapping matching sub-sequences that matches entries in the trie.
* Sub-sequences that are longer are preferred, then sub-sequences that starts earlier.
* @param list Sequence to search through
* @return List of matches sorted by start position
*/
public List<Match<K,V>> findNonOverlapping(List<K> list) {
return findNonOverlapping(list, 0, list.size());
}
public final static Comparator<Match> MATCH_LENGTH_ENDPOINTS_COMPARATOR = Interval.<Match>lengthEndpointsComparator();
public final static Function<Match, Double> MATCH_LENGTH_SCORER = Interval.<Match>lengthScorer();
/**
* Given a sequence to search through (e.g. piece of text would be a sequence of words),
* finds all non-overlapping matching sub-sequences that matches entries in the trie.
* Sub-sequences that are longer are preferred, then sub-sequences that starts earlier.
* @param list Sequence to search through
* @param start start index to start search at
* @param end end index (exclusive) to end search at
* @return List of matches sorted by start position
*/
public List<Match<K,V>> findNonOverlapping(List<K> list, int start, int end) {
return findNonOverlapping(list, start, end, MATCH_LENGTH_ENDPOINTS_COMPARATOR);
}
/**
* Given a sequence to search through (e.g. piece of text would be a sequence of words),
* finds all non-overlapping matching sub-sequences that matches entries in the trie.
* @param list Sequence to search through
* @param start start index to start search at
* @param end end index (exclusive) to end search at
* @param compareFunc Comparison function to use for evaluating which overlapping sub-sequence to keep.
* Earlier sub-sequences based on the comparison function are favored.
* @return List of matches sorted by start position
*/
public List<Match<K,V>> findNonOverlapping(List<K> list, int start, int end, Comparator<? super Match<K,V>> compareFunc) {
List<Match<K,V>> allMatches = findAllMatches(list, start, end);
return getNonOverlapping(allMatches, compareFunc);
}
/**
* Given a sequence to search through (e.g. piece of text would be a sequence of words),
* finds all non-overlapping matching sub-sequences that matches entries in the trie while attempting to maximize the scoreFunc.
* @param list Sequence to search through
* @param start start index to start search at
* @param end end index (exclusive) to end search at
* @param scoreFunc Scoring function indicating how good the match is
* @return List of matches sorted by start position
*/
public List<Match<K,V>> findNonOverlapping(List<K> list, int start, int end, Function<? super Match<K,V>, Double> scoreFunc) {
List<Match<K,V>> allMatches = findAllMatches(list, start, end);
return getNonOverlapping(allMatches, scoreFunc);
}
/**
* Segment a sequence into sequence of sub-sequences by attempting to find the longest non-overlapping
* sub-sequences. Non-matched parts will be included as a match with a null value.
* @param list Sequence to search through
* @return List of segments (as matches) sorted by start position
*/
public List<Match<K,V>> segment(K ... list) {
return segment(Arrays.asList(list));
}
/**
* Segment a sequence into sequence of sub-sequences by attempting to find the longest non-overlapping
* sub-sequences. Non-matched parts will be included as a match with a null value.
* @param list Sequence to search through
* @return List of segments (as matches) sorted by start position
*/
public List<Match<K,V>> segment(List<K> list) {
return segment(list, 0, list.size());
}
/**
* Segment a sequence into sequence of sub-sequences by attempting to find the longest non-overlapping
* sub-sequences. Non-matched parts will be included as a match with a null value.
* @param list Sequence to search through
* @param start start index to start search at
* @param end end index (exclusive) to end search at
* @return List of segments (as matches) sorted by start position
*/
public List<Match<K,V>> segment(List<K> list, int start, int end) {
return segment(list, start, end, MATCH_LENGTH_SCORER);
}
/**
* Segment a sequence into sequence of sub-sequences by attempting to find the non-overlapping
* sub-sequences that comes earlier using the compareFunc.
* Non-matched parts will be included as a match with a null value.
* @param list Sequence to search through
* @param start start index to start search at
* @param end end index (exclusive) to end search at
* @param compareFunc Comparison function to use for evaluating which overlapping sub-sequence to keep.
* Earlier sub-sequences based on the comparison function are favored.
* @return List of segments (as matches) sorted by start position
*/
public List<Match<K,V>> segment(List<K> list, int start, int end, Comparator<? super Match<K,V>> compareFunc) {
List<Match<K,V>> nonOverlapping = findNonOverlapping(list, start, end, compareFunc);
List<Match<K,V>> segments = new ArrayList<>(nonOverlapping.size());
int last = 0;
for (Match<K,V> match:nonOverlapping) {
if (match.begin > last) {
// Create empty match and add to segments
Match<K,V> empty = new Match<>(list.subList(last, match.begin), null, last, match.begin);
segments.add(empty);
}
segments.add(match);
last = match.end;
}
if (list.size() > last) {
Match<K,V> empty = new Match<>(list.subList(last, list.size()), null, last, list.size());
segments.add(empty);
}
return segments;
}
/**
* Segment a sequence into sequence of sub-sequences by attempting to maximize the total score
* Non-matched parts will be included as a match with a null value.
* @param list Sequence to search through
* @param start start index to start search at
* @param end end index (exclusive) to end search at
* @param scoreFunc Scoring function indicating how good the match is
* @return List of segments (as matches) sorted by start position
*/
public List<Match<K,V>> segment(List<K> list, int start, int end, Function<? super Match<K,V>, Double> scoreFunc) {
List<Match<K,V>> nonOverlapping = findNonOverlapping(list, start, end, scoreFunc);
List<Match<K,V>> segments = new ArrayList<>(nonOverlapping.size());
int last = 0;
for (Match<K,V> match:nonOverlapping) {
if (match.begin > last) {
// Create empty match and add to segments
Match<K,V> empty = new Match<>(list.subList(last, match.begin), null, last, match.begin);
segments.add(empty);
}
segments.add(match);
last = match.end;
}
if (list.size() > last) {
Match<K,V> empty = new Match<>(list.subList(last, list.size()), null, last, list.size());
segments.add(empty);
}
return segments;
}
public List<Match<K,V>> segment(List<K> list, Function<? super Match<K,V>, Double> scoreFunc) {
return segment(list, 0, list.size(), scoreFunc);
}
/**
* Given a list of matches, returns all non-overlapping matches.
* Matches that are longer are preferred, then matches that starts earlier.
* @param allMatches List of matches
* @return List of matches sorted by start position
*/
public List<Match<K,V>> getNonOverlapping(List<Match<K,V>> allMatches) {
return getNonOverlapping(allMatches, MATCH_LENGTH_ENDPOINTS_COMPARATOR);
}
/**
* Given a list of matches, returns all non-overlapping matches.
* @param allMatches List of matches
* @param compareFunc Comparison function to use for evaluating which overlapping sub-sequence to keep.
* Earlier sub-sequences based on the comparison function are favored.
* @return List of matches sorted by start position
*/
public List<Match<K,V>> getNonOverlapping(List<Match<K,V>> allMatches, Comparator<? super Match<K,V>> compareFunc) {
if (allMatches.size() > 1) {
List<Match<K,V>> nonOverlapping = IntervalTree.getNonOverlapping(allMatches, compareFunc);
Collections.sort(nonOverlapping, HasInterval.ENDPOINTS_COMPARATOR);
return nonOverlapping;
} else {
return allMatches;
}
}
public List<Match<K,V>> getNonOverlapping(List<Match<K,V>> allMatches, Function<? super Match<K,V>, Double> scoreFunc) {
return IntervalTree.getNonOverlappingMaxScore(allMatches, scoreFunc);
}
protected void updateAllMatches(TrieMap<K,V> trie, List<Match<K,V>> matches, List<K> matched, List<K> list, int start, int end) {
for (int i = start; i < end; i++) {
updateAllMatchesWithStart(trie, matches, matched, list, i, end);
}
}
protected void updateAllMatchesWithStart(TrieMap<K,V> trie, List<Match<K,V>> matches, List<K> matched, List<K> list, int start, int end) {
if (start > end) return;
if (trie.children != null && start < end) {
K key = list.get(start);
TrieMap<K,V> child = trie.children.get(key);
if (child != null) {
List<K> p = new ArrayList<>(matched.size() + 1);
p.addAll(matched);
p.add(key);
updateAllMatchesWithStart(child, matches, p, list, start + 1, end);
}
}
if (trie.isLeaf()) {
matches.add(new Match<>(matched, trie.value, start - matched.size(), start));
}
}
// Helper class for keeping track of partial matches with TrieMatcher
private static class PartialApproxMatch<K,V> extends ApproxMatch<K,V> {
TrieMap<K,V> trie;
int lastMultimatchedMatchedStartIndex = 0;
int lastMultimatchedOriginalStartIndex = 0;
private PartialApproxMatch() {}
private PartialApproxMatch(double cost, TrieMap<K,V> trie, int alignmentLength) {
this.trie = trie;
this.cost = cost;
this.value = (trie != null)? this.trie.value:null;
if (alignmentLength > 0) {
this.alignments = new Interval[alignmentLength];
}
}
private PartialApproxMatch<K,V> withMatch(MatchCostFunction<K,V> costFunction, double deltaCost, K t, K k) {
PartialApproxMatch<K,V> res = new PartialApproxMatch<>();
res.matched = matched;
if (k != null) {
if (res.matched == null) {
res.matched = new ArrayList<>(1);
} else {
res.matched = new ArrayList<>(matched.size() + 1);
res.matched.addAll(matched);
}
res.matched.add(k);
}
res.begin = begin;
res.end = (t != null)? end + 1: end;
res.cost = cost + deltaCost;
res.trie = (k != null)? trie.getChildTrie(k):trie;
res.value = (res.trie != null)? res.trie.value:null;
res.multimatches = multimatches;
res.lastMultimatchedMatchedStartIndex = lastMultimatchedMatchedStartIndex;
res.lastMultimatchedOriginalStartIndex = lastMultimatchedOriginalStartIndex;
if (res.lastMultimatchedOriginalStartIndex == end && k == null && t != null) {
res.lastMultimatchedOriginalStartIndex++;
}
// Update alignments
if (alignments != null) {
res.alignments = new Interval[alignments.length];
System.arraycopy(alignments, 0, res.alignments, 0, alignments.length);
if (k != null && res.end > 0) {
int p = res.end-1;
if (res.alignments[p] == null) {
res.alignments[p] = Interval.toInterval(res.matched.size()-1, res.matched.size());
} else {
res.alignments[p] = Interval.toInterval(res.alignments[p].getBegin(), res.alignments[p].getEnd() + 1);
}
}
}
return res;
}
private ApproxMatch<K,V> toApproxMatch() {
// Makes a copy of this partial approx match that can be returned to the caller
return new ApproxMatch<>(matched, value, begin, end, multimatches, cost, alignments);
}
private PartialApproxMatch<K,V> withMatch(MatchCostFunction<K,V> costFunction, double deltaCost,
K t, K k, boolean multimatch, TrieMap<K,V> root) {
PartialApproxMatch<K,V> res = withMatch(costFunction, deltaCost, t, k);
if (multimatch && res.matched != null && res.value != null) {
// Update tracking of matched keys and values for multiple entry matches
if (res.multimatches == null) {
res.multimatches = new ArrayList<>(1);
} else {
res.multimatches = new ArrayList<>(multimatches.size() + 1);
res.multimatches.addAll(multimatches);
}
List<K> newlyMatched = res.matched.subList(lastMultimatchedMatchedStartIndex, res.matched.size());
res.multimatches.add(new Match<>(
newlyMatched,
res.value,
lastMultimatchedOriginalStartIndex, res.end
));
res.cost += costFunction.multiMatchDeltaCost(newlyMatched, res.value, multimatches, res.multimatches);
res.lastMultimatchedMatchedStartIndex = res.matched.size();
res.lastMultimatchedOriginalStartIndex = res.end;
// Reset current value/key being matched
res.trie = root;
}
return res;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
PartialApproxMatch that = (PartialApproxMatch) o;
if (lastMultimatchedMatchedStartIndex != that.lastMultimatchedMatchedStartIndex) return false;
if (lastMultimatchedOriginalStartIndex != that.lastMultimatchedOriginalStartIndex) return false;
if (trie != null ? !trie.equals(that.trie) : that.trie != null) return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + lastMultimatchedMatchedStartIndex;
result = 31 * result + lastMultimatchedOriginalStartIndex;
return result;
}
}
private static class MatchQueue<K,V> {
private final BoundedCostOrderedMap<Match<K,V>, PartialApproxMatch<K,V>> queue;
protected final int maxSize;
protected final double maxCost;
public final Function<PartialApproxMatch<K,V>, Double> MATCH_COST_FUNCTION = in -> in.cost;
public MatchQueue(int maxSize, double maxCost) {
this.maxSize = maxSize;
this.maxCost = maxCost;
this.queue = new BoundedCostOrderedMap<>(MATCH_COST_FUNCTION, maxSize, maxCost);
}
public void add(PartialApproxMatch<K,V> pam) {
List<Match<K,V>> multiMatchesWithoutOffsets = null;
if (pam.multimatches != null) {
multiMatchesWithoutOffsets = new ArrayList<>(pam.multimatches.size());
for (Match<K,V> m:pam.multimatches) {
multiMatchesWithoutOffsets.add(new Match<>(m.matched, m.value, 0, 0));
}
}
Match<K,V> m = new MultiMatch<>(pam.matched, pam.value, pam.begin, pam.end, multiMatchesWithoutOffsets);
queue.put(m, pam);
}
public double topCost() { return queue.topCost(); }
public int size() { return queue.size(); }
public boolean isEmpty() { return queue.isEmpty(); }
public List<PartialApproxMatch<K,V>> toSortedList() {
List<PartialApproxMatch<K,V>> res = queue.valuesList();
Collections.sort(res, TrieMapMatcher.<K,V>partialMatchComparator());
return res;
}
}
private static class MultiMatchQueue<K,V> extends MatchQueue<K,V> {
private final Map<Integer, BoundedCostOrderedMap<Match<K,V>, PartialApproxMatch<K,V>>> multimatchQueues;
public MultiMatchQueue(int maxSize, double maxCost) {
super(maxSize, maxCost);
this.multimatchQueues = new HashMap<>();
}
public void add(PartialApproxMatch<K,V> pam) {
Match<K,V> m = new MultiMatch<>(
pam.matched, pam.value, pam.begin, pam.end, pam.multimatches);
Integer key = (pam.multimatches != null)? pam.multimatches.size():0;
if (pam.value == null) key = key + 1;
BoundedCostOrderedMap<Match<K,V>, PartialApproxMatch<K,V>> mq = multimatchQueues.get(key);
if (mq == null) {
multimatchQueues.put(key, mq = new BoundedCostOrderedMap<>(
MATCH_COST_FUNCTION, maxSize, maxCost));
}
mq.put(m, pam);
}
public double topCost() {
double cost = Double.MIN_VALUE;
for (BoundedCostOrderedMap<Match<K,V>, PartialApproxMatch<K,V>> q:multimatchQueues.values()) {
if (q.topCost() > cost) cost = q.topCost();
}
return cost;
}
public int size() {
int sz = 0;
for (BoundedCostOrderedMap<Match<K,V>, PartialApproxMatch<K,V>> q:multimatchQueues.values()) {
sz += q.size();
}
return sz;
}
public List<PartialApproxMatch<K,V>> toSortedList() {
List<PartialApproxMatch<K,V>> all = new ArrayList<>(size());
for (BoundedCostOrderedMap<Match<K,V>, PartialApproxMatch<K,V>> q:multimatchQueues.values()) {
all.addAll(q.valuesList());
}
Collections.sort(all, TrieMapMatcher.<K,V>partialMatchComparator());
return all;
}
}
private boolean addToQueue(MatchQueue<K,V> queue,
MatchQueue<K,V> best,
MatchCostFunction<K,V> costFunction,
PartialApproxMatch<K,V> pam, K a, K b,
boolean multimatch, boolean complete) {
double deltaCost = costFunction.cost(a,b,pam.getMatchedLength());
double newCost = pam.cost + deltaCost;
if (queue.maxCost != Double.MAX_VALUE && newCost > queue.maxCost) return false;
if (best.size() >= queue.maxSize && newCost > best.topCost()) return false;
PartialApproxMatch<K,V> npam = pam.withMatch(costFunction, deltaCost, a, b);
if (!multimatch || (npam.trie != null && npam.trie.children != null)) {
if (!multimatch && complete && npam.value != null) {
best.add(npam);
}
queue.add(npam);
}
if (multimatch && npam.value != null) {
npam = pam.withMatch(costFunction, deltaCost, a, b, multimatch, rootWithDelimiter);
if (complete && npam.value != null) {
best.add(npam);
}
queue.add(npam);
}
return true;
}
public static <K,V> MatchCostFunction<K,V> defaultCost() {
return ErasureUtils.uncheckedCast(DEFAULT_COST);
}
public static <K,V> Comparator<PartialApproxMatch<K,V>> partialMatchComparator() {
return ErasureUtils.uncheckedCast(PARTIAL_MATCH_COMPARATOR);
}
private static final MatchCostFunction DEFAULT_COST = new ExactMatchCost();
private static final Comparator<PartialApproxMatch> PARTIAL_MATCH_COMPARATOR = (o1, o2) -> {
if (o1.cost == o2.cost) {
if (o1.matched.size() == o2.matched.size()) {
int m1 = (o1.multimatches != null)? o1.multimatches.size():0;
int m2 = (o2.multimatches != null)? o2.multimatches.size():0;
if (m1 == m2) {
if (o1.begin == o2.begin) {
if (o1.end == o2.end) {
for (int i = 0; i < o1.matched.size(); i++) {
Object x1 = o1.matched.get(i);
Object x2 = o2.matched.get(i);
if (x1 != null && x2 != null) {
if (x1 instanceof Comparable) {
int comp = ((Comparable) x1).compareTo(x2);
if (comp != 0) return comp;
}
}
}
if (o1.multimatches != null && o2.multimatches != null) {
for (int i = 0; i < o1.multimatches.size(); i++) {
Match mm1 = (Match) o1.multimatches.get(i);
Match mm2 = (Match) o2.multimatches.get(i);
return mm1.getInterval().compareTo(mm2.getInterval());
}
}
return 0;
}
return (o1.end < o2.end)? -1:1;
} else return (o1.begin < o2.begin)? -1:1;
} else return (m1 < m2)? -1:1;
} else return (o1.matched.size() < o2.matched.size())? -1:1;
} else if (Double.isNaN(o1.cost)) {
return -1;
} else if (Double.isNaN(o2.cost)) {
return 1;
} else return (o1.cost < o2.cost)? -1:1;
};
}