package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.ling.tokensregex.types.Tags;
import edu.stanford.nlp.pipeline.CoreMapAggregator;
import edu.stanford.nlp.pipeline.CoreMapAttributeAggregator;
import java.util.function.Function;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.util.Pair;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Holds environment variables to be used for compiling string into a pattern.
* Use {@link EnvLookup} to perform actual lookup (it will provide reasonable defaults).
*
* <p>
* Some of the types of variables to bind are:
* </p>
* <ul>
* <li>{@code SequencePattern} (compiled pattern)</li>
* <li>{@code PatternExpr} (sequence pattern expression - precompiled)</li>
* <li>{@code NodePattern} (pattern for matching one element)</li>
* <li>{@code Class} (binding of CoreMap attribute to java Class)</li>
* </ul>
*
* @author Angel Chang
*/
// Various of the public variables in this class are instantiated by reflection from TokensRegex rules
@SuppressWarnings({"WeakerAccess", "unused"})
public class Env {
/**
* Parser that converts a string into a SequencePattern.
* @see edu.stanford.nlp.ling.tokensregex.parser.TokenSequenceParser
*/
SequencePattern.Parser parser;
/**
* Mapping of variable names to their values
*/
private Map<String, Object> variables = new HashMap<>();//Generics.newHashMap();
/**
* Mapping of per thread temporary variables to their values.
*/
private ThreadLocal<Map<String,Object>> threadLocalVariables = new ThreadLocal<>();
/**
* Mapping of variables that can be expanded in a regular expression for strings,
* to their regular expressions.
* The variable name must start with "$" and include only the alphanumeric characters
* (it should follow the pattern {@code $[A-Za-z0-9_]+}).
* Each variable is mapped to a pair, consisting of the {@code Pattern} representing
* the name of the variable to be replaced, and a {@code String} representing the
* regular expression (escaped) that is used to replace the name of the variable.
*/
private Map<String, Pair<Pattern,String>> stringRegexVariables = new HashMap<>(); //Generics.newHashMap();
/**
* Default parameters (used when reading in rules for {@link SequenceMatchRules}.
*/
public Map<String, Object> defaults = new HashMap<>();//Generics.newHashMap();
/**
* Default flags to use for string regular expressions match
* @see java.util.regex.Pattern#compile(String,int)
*/
public int defaultStringPatternFlags = 0;
/**
* Default flags to use for string literal match
* @see NodePattern#CASE_INSENSITIVE
*/
public int defaultStringMatchFlags = 0;
public Class sequenceMatchResultExtractor;
public Class stringMatchResultExtractor;
/**
* Annotation key to use to getting tokens (default is CoreAnnotations.TokensAnnotation.class)
*/
public Class defaultTokensAnnotationKey;
/**
* Annotation key to use to getting text (default is CoreAnnotations.TextAnnotation.class)
*/
public Class defaultTextAnnotationKey;
/**
* List of keys indicating the per-token annotations (default is null).
* If specified, each token will be annotated with the extracted results from the
* {@link #defaultResultsAnnotationExtractor}.
* If null, then individual tokens that are matched are not annotated.
*/
public List<Class> defaultTokensResultAnnotationKey;
/**
* List of keys indicating what fields should be annotated for the aggregated CoreMap.
* If specified, the aggregated CoreMap is annotated with the extracted results from the
* {@link #defaultResultsAnnotationExtractor}.
* If null, then the aggregated CoreMap is not annotated.
*/
public List<Class> defaultResultAnnotationKey;
/**
* Annotation key to use during composite phase for storing matched sequences and to match against.
*/
public Class defaultNestedResultsAnnotationKey;
/**
* How should the tokens be aggregated when collapsing a sequence of tokens into one CoreMap
*/
public Map<Class, CoreMapAttributeAggregator> defaultTokensAggregators;
private CoreMapAggregator defaultTokensAggregator;
/**
* Whether we should merge and output CoreLabels or not.
*/
public boolean aggregateToTokens;
/**
* How annotations are extracted from the MatchedExpression.
* If the result type is a List and more than one annotation key is specified,
* then the result is paired with the annotation key.
* Example: If annotation key is [ner,normalized] and result is [CITY,San Francisco]
* then the final CoreMap will have ner=CITY, normalized=San Francisco.
* Otherwise, the result is treated as one object (all keys will be assigned that value).
*/
Function<MatchedExpression,?> defaultResultsAnnotationExtractor;
/**
* Interface for performing custom binding of values to the environment
*/
public interface Binder {
void init(String prefix, Properties props);
void bind(Env env);
}
public Env(SequencePattern.Parser p) { this.parser = p; }
public void initDefaultBindings() {
bind("FALSE", Expressions.FALSE);
bind("TRUE", Expressions.TRUE);
bind("NIL", Expressions.NIL);
bind("ENV", this);
bind("tags", Tags.TagsAnnotation.class);
}
public Map<String, Object> getDefaults() {
return defaults;
}
public void setDefaults(Map<String, Object> defaults) {
this.defaults = defaults;
}
public Map<Class, CoreMapAttributeAggregator> getDefaultTokensAggregators() {
return defaultTokensAggregators;
}
public void setDefaultTokensAggregators(Map<Class, CoreMapAttributeAggregator> defaultTokensAggregators) {
this.defaultTokensAggregators = defaultTokensAggregators;
}
public CoreMapAggregator getDefaultTokensAggregator() {
if (defaultTokensAggregator == null && (defaultTokensAggregators != null || aggregateToTokens)) {
CoreLabelTokenFactory tokenFactory = (aggregateToTokens) ? new CoreLabelTokenFactory() : null;
Map<Class, CoreMapAttributeAggregator> aggregators = defaultTokensAggregators;
if (aggregators == null) {
aggregators = CoreMapAttributeAggregator.DEFAULT_NUMERIC_TOKENS_AGGREGATORS;
}
defaultTokensAggregator = CoreMapAggregator.getAggregator(aggregators, null, tokenFactory);
}
return defaultTokensAggregator;
}
public Class getDefaultTextAnnotationKey() {
return defaultTextAnnotationKey;
}
public void setDefaultTextAnnotationKey(Class defaultTextAnnotationKey) {
this.defaultTextAnnotationKey = defaultTextAnnotationKey;
}
public Class getDefaultTokensAnnotationKey() {
return defaultTokensAnnotationKey;
}
public void setDefaultTokensAnnotationKey(Class defaultTokensAnnotationKey) {
this.defaultTokensAnnotationKey = defaultTokensAnnotationKey;
}
public List<Class> getDefaultTokensResultAnnotationKey() {
return defaultTokensResultAnnotationKey;
}
public void setDefaultTokensResultAnnotationKey(Class... defaultTokensResultAnnotationKey) {
this.defaultTokensResultAnnotationKey = Arrays.asList(defaultTokensResultAnnotationKey);
}
public void setDefaultTokensResultAnnotationKey(List<Class> defaultTokensResultAnnotationKey) {
this.defaultTokensResultAnnotationKey = defaultTokensResultAnnotationKey;
}
public List<Class> getDefaultResultAnnotationKey() {
return defaultResultAnnotationKey;
}
public void setDefaultResultAnnotationKey(Class... defaultResultAnnotationKey) {
this.defaultResultAnnotationKey = Arrays.asList(defaultResultAnnotationKey);
}
public void setDefaultResultAnnotationKey(List<Class> defaultResultAnnotationKey) {
this.defaultResultAnnotationKey = defaultResultAnnotationKey;
}
public Class getDefaultNestedResultsAnnotationKey() {
return defaultNestedResultsAnnotationKey;
}
public void setDefaultNestedResultsAnnotationKey(Class defaultNestedResultsAnnotationKey) {
this.defaultNestedResultsAnnotationKey = defaultNestedResultsAnnotationKey;
}
public Function<MatchedExpression, ?> getDefaultResultsAnnotationExtractor() {
return defaultResultsAnnotationExtractor;
}
public void setDefaultResultsAnnotationExtractor(Function<MatchedExpression, ?> defaultResultsAnnotationExtractor) {
this.defaultResultsAnnotationExtractor = defaultResultsAnnotationExtractor;
}
public Class getSequenceMatchResultExtractor() {
return sequenceMatchResultExtractor;
}
public void setSequenceMatchResultExtractor(Class sequenceMatchResultExtractor) {
this.sequenceMatchResultExtractor = sequenceMatchResultExtractor;
}
public Class getStringMatchResultExtractor() {
return stringMatchResultExtractor;
}
public void setStringMatchResultExtractor(Class stringMatchResultExtractor) {
this.stringMatchResultExtractor = stringMatchResultExtractor;
}
public Map<String, Object> getVariables() {
return variables;
}
public void setVariables(Map<String, Object> variables) {
this.variables = variables;
}
public void clearVariables() {
this.variables.clear();
}
public int getDefaultStringPatternFlags() {
return defaultStringPatternFlags;
}
public void setDefaultStringPatternFlags(int defaultStringPatternFlags) {
this.defaultStringPatternFlags = defaultStringPatternFlags;
}
public int getDefaultStringMatchFlags() {
return defaultStringMatchFlags;
}
public void setDefaultStringMatchFlags(int defaultStringMatchFlags) {
this.defaultStringMatchFlags = defaultStringMatchFlags;
}
private static final Pattern STRING_REGEX_VAR_NAME_PATTERN = Pattern.compile("\\$[A-Za-z0-9_]+");
public void bindStringRegex(String var, String regex)
{
// Enforce requirements on variable names ($alphanumeric_)
if (!STRING_REGEX_VAR_NAME_PATTERN.matcher(var).matches()) {
throw new IllegalArgumentException("StringRegex binding error: Invalid variable name " + var);
}
Pattern varPattern = Pattern.compile(Pattern.quote(var));
String replace = Matcher.quoteReplacement(regex);
stringRegexVariables.put(var, new Pair<>(varPattern, replace));
}
public String expandStringRegex(String regex) {
// Replace all variables in regex
String expanded = regex;
for (Map.Entry<String, Pair<Pattern, String>> stringPairEntry : stringRegexVariables.entrySet()) {
Pair<Pattern,String> p = stringPairEntry.getValue();
expanded = p.first().matcher(expanded).replaceAll(p.second());
}
return expanded;
}
public Pattern getStringPattern(String regex) {
String expanded = expandStringRegex(regex);
return Pattern.compile(expanded, defaultStringPatternFlags);
}
public void bind(String name, Object obj) {
if (obj != null) {
variables.put(name, obj);
} else {
variables.remove(name);
}
}
public void bind(String name, SequencePattern pattern) {
bind(name, pattern.getPatternExpr());
}
public void unbind(String name) {
bind(name, null);
}
public NodePattern getNodePattern(String name) {
Object obj = variables.get(name);
if (obj != null) {
if (obj instanceof SequencePattern) {
SequencePattern seqPattern = (SequencePattern) obj;
if (seqPattern.getPatternExpr() instanceof SequencePattern.NodePatternExpr) {
return ((SequencePattern.NodePatternExpr) seqPattern.getPatternExpr()).nodePattern;
} else {
throw new Error("Invalid node pattern class: " + seqPattern.getPatternExpr().getClass() + " for variable " + name);
}
} else if (obj instanceof SequencePattern.NodePatternExpr) {
SequencePattern.NodePatternExpr pe = (SequencePattern.NodePatternExpr) obj;
return pe.nodePattern;
} else if (obj instanceof NodePattern) {
return (NodePattern) obj;
} else if (obj instanceof String) {
try {
SequencePattern.NodePatternExpr pe = (SequencePattern.NodePatternExpr) parser.parseNode(this, (String) obj);
return pe.nodePattern;
} catch (Exception pex) {
throw new RuntimeException("Error parsing " + obj + " to node pattern", pex);
}
} else {
throw new Error("Invalid node pattern variable class: " + obj.getClass() + " for variable " + name);
}
}
return null;
}
public SequencePattern.PatternExpr getSequencePatternExpr(String name, boolean copy) {
Object obj = variables.get(name);
if (obj != null) {
if (obj instanceof SequencePattern) {
SequencePattern seqPattern = (SequencePattern) obj;
return seqPattern.getPatternExpr();
} else if (obj instanceof SequencePattern.PatternExpr) {
SequencePattern.PatternExpr pe = (SequencePattern.PatternExpr) obj;
return (copy)? pe.copy():pe;
} else if (obj instanceof NodePattern) {
return new SequencePattern.NodePatternExpr( (NodePattern) obj);
} else if (obj instanceof String) {
try {
return parser.parseSequence(this, (String) obj);
} catch (Exception pex) {
throw new RuntimeException("Error parsing " + obj + " to sequence pattern", pex);
}
} else {
throw new Error("Invalid sequence pattern variable class: " + obj.getClass());
}
}
return null;
}
public Object get(String name)
{
return variables.get(name);
}
// Functions for storing temporary thread specific variables
// that are used when running tokensregex
public void push(String name, Object value) {
Map<String,Object> vars = threadLocalVariables.get();
if (vars == null) {
threadLocalVariables.set(vars = new HashMap<>()); //Generics.newHashMap());
}
Stack<Object> stack = (Stack<Object>) vars.get(name);
if (stack == null) {
vars.put(name, stack = new Stack<>());
}
stack.push(value);
}
public Object pop(String name) {
Map<String,Object> vars = threadLocalVariables.get();
if (vars == null) return null;
Stack<Object> stack = (Stack<Object>) vars.get(name);
if (stack == null || stack.isEmpty()) {
return null;
} else {
return stack.pop();
}
}
public Object peek(String name) {
Map<String,Object> vars = threadLocalVariables.get();
if (vars == null) return null;
Stack<Object> stack = (Stack<Object>) vars.get(name);
if (stack == null || stack.isEmpty()) {
return null;
} else {
return stack.peek();
}
}
}