package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.pipeline.CoreMapAggregator;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* Performs a action on a matched sequence
*
* @author Angel Chang
*/
public abstract class CoreMapSequenceMatchAction<T extends CoreMap> implements SequenceMatchAction<T> {
public final static class AnnotateAction<T extends CoreMap> extends CoreMapSequenceMatchAction<T> {
Map<String,String> attributes; // TODO: Preconvert, handle when to overwrite existing attributes
public AnnotateAction(Map<String, String> attributes) {
this.attributes = attributes;
}
public SequenceMatchResult<T> apply(SequenceMatchResult<T> matchResult, int... groups) {
for (int group:groups) {
int groupStart = matchResult.start(group);
if (groupStart >=0) {
int groupEnd = matchResult.end(group);
ChunkAnnotationUtils.annotateChunks(matchResult.elements(), groupStart, groupEnd, attributes);
}
}
return matchResult;
}
}
public final static MergeAction DEFAULT_MERGE_ACTION = new MergeAction();
public final static class MergeAction extends CoreMapSequenceMatchAction<CoreMap> {
CoreMapAggregator aggregator = CoreMapAggregator.getDefaultAggregator();
public MergeAction() {}
public MergeAction(CoreMapAggregator aggregator) {
this.aggregator = aggregator;
}
public SequenceMatchResult<CoreMap> apply(SequenceMatchResult<CoreMap> matchResult, int... groups)
{
BasicSequenceMatchResult<CoreMap> res = matchResult.toBasicSequenceMatchResult();
List<? extends CoreMap> elements = matchResult.elements();
List<CoreMap> mergedElements = new ArrayList<>();
res.elements = mergedElements;
int last = 0;
int mergedGroup = 0;
int offset = 0;
List<Integer> orderedGroups = CollectionUtils.asList(groups);
Collections.sort(orderedGroups);
for (int group:orderedGroups) {
int groupStart = matchResult.start(group);
if (groupStart >= last) {
// Add elements from last to start of group to merged elements
mergedElements.addAll(elements.subList(last,groupStart));
// Fiddle with matched group indices
for (; mergedGroup < group; mergedGroup++) {
if (res.matchedGroups[mergedGroup] != null) {
res.matchedGroups[mergedGroup].matchBegin -= offset;
res.matchedGroups[mergedGroup].matchEnd -= offset;
}
}
// Get merged element
int groupEnd = matchResult.end(group);
if (groupEnd - groupStart >= 1) {
CoreMap merged = aggregator.merge(elements, groupStart, groupEnd);
mergedElements.add(merged);
last = groupEnd;
// Fiddle with matched group indices
res.matchedGroups[mergedGroup].matchBegin = mergedElements.size()-1;
res.matchedGroups[mergedGroup].matchEnd = mergedElements.size();
mergedGroup++;
while (mergedGroup < res.matchedGroups.length) {
if (res.matchedGroups[mergedGroup] != null) {
if (res.matchedGroups[mergedGroup].matchBegin == matchResult.start(group) &&
res.matchedGroups[mergedGroup].matchEnd == matchResult.end(group)) {
res.matchedGroups[mergedGroup].matchBegin = res.matchedGroups[group].matchBegin;
res.matchedGroups[mergedGroup].matchEnd = res.matchedGroups[group].matchEnd;
} else if (res.matchedGroups[mergedGroup].matchEnd <= matchResult.end(group)) {
res.matchedGroups[mergedGroup] = null;
} else {
break;
}
}
mergedGroup++;
}
offset = matchResult.end(group) - res.matchedGroups[group].matchEnd;
}
}
}
// Add rest of elements
mergedElements.addAll(elements.subList(last, elements.size()));
// Fiddle with matched group indices
for (; mergedGroup < res.matchedGroups.length; mergedGroup++) {
if (res.matchedGroups[mergedGroup] != null) {
res.matchedGroups[mergedGroup].matchBegin -= offset;
res.matchedGroups[mergedGroup].matchEnd -= offset;
}
}
return res;
}
}
}