package edu.stanford.nlp.ling.tokensregex;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.pipeline.CoreMapAggregator;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CoreMap;
/**
* CoreMap Sequence Matcher for regular expressions for sequences over CoreMaps.
*
* @author Angel Chang
*/
public class CoreMapSequenceMatcher<T extends CoreMap> extends SequenceMatcher<T> {
protected static final Function<List<? extends CoreMap>, String> COREMAP_LIST_TO_STRING_CONVERTER =
in -> (in != null)? ChunkAnnotationUtils.getTokenText(in, CoreAnnotations.TextAnnotation.class): null;
public CoreMapSequenceMatcher(SequencePattern<T> pattern, List<? extends T> tokens) {
super(pattern, tokens);
// this.nodesToStringConverter = COREMAP_LIST_TO_STRING_CONVERTER;
}
public static class BasicCoreMapSequenceMatcher extends CoreMapSequenceMatcher<CoreMap> {
CoreMap annotation;
public BasicCoreMapSequenceMatcher(SequencePattern<CoreMap> pattern, CoreMap annotation) {
super(pattern, annotation.get(CoreAnnotations.TokensAnnotation.class));
this.annotation = annotation;
this.nodesToStringConverter = COREMAP_LIST_TO_STRING_CONVERTER;
}
}
public void annotateGroup(Map<String,String> attributes)
{
annotateGroup(0, attributes);
}
public void annotateGroup(int group, Map<String,String> attributes) {
int groupStart = start(group);
if (groupStart >=0) {
int groupEnd = end(group);
ChunkAnnotationUtils.annotateChunks(elements, groupStart, groupEnd, attributes);
}
}
public List<CoreMap> getMergedList()
{
return getMergedList(0);
}
public List<CoreMap> getMergedList(int... groups) {
List<CoreMap> res = new ArrayList<>();
int last = 0;
List<Integer> orderedGroups = CollectionUtils.asList(groups);
Collections.sort(orderedGroups);
for (int group:orderedGroups) {
int groupStart = start(group);
if (groupStart >= last) {
res.addAll(elements.subList(last,groupStart));
int groupEnd = end(group);
if (groupEnd - groupStart >= 1) {
CoreMap merged = createMergedChunk(groupStart, groupEnd);
res.add(merged);
last = groupEnd;
}
}
}
res.addAll(elements.subList(last, elements.size()));
return res;
}
public CoreMap mergeGroup()
{
return mergeGroup(0);
}
private CoreMap createMergedChunk(int groupStart, int groupEnd) {
CoreMap merged = null;
/* if (annotation != null) {
// Take start and end
merged = ChunkAnnotationUtils.getMergedChunk(elements, annotation.get(CoreAnnotations.TextAnnotation.class), groupStart, groupEnd);
} */
if (merged == null) {
// Okay, have to go through these one by one and merge them
merged = CoreMapAggregator.getDefaultAggregator().merge(elements, groupStart, groupEnd);
}
return merged;
}
public CoreMap mergeGroup(int group) {
int groupStart = start(group);
if (groupStart >=0) {
int groupEnd = end(group);
if (groupEnd - groupStart >= 1) {
return createMergedChunk(groupStart, groupEnd);
}
}
return null;
}
}