SequenceMatchRules.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.ling.tokensregex;

import edu.stanford.nlp.ling.tokensregex.types.AssignableExpression;
import edu.stanford.nlp.ling.tokensregex.types.Expression;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.ling.tokensregex.types.Value;
import edu.stanford.nlp.util.*;

import java.io.Serializable;
import java.util.*;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Rules for matching sequences using regular expressions
 * <p>
 * There are 2 types of rules:
 * <ol>
 * <li><b>Assignment rules</b> which assign a value to a variable for later use.
 * </li>
 * <li><b>Extraction rules</b> which specifies how regular expression patterns are to be matched against text,
 *   which matched text expressions are to extracted, and what value to assign to the matched expression.</li>
 * </ol>
 * </p>
 *
 * NOTE: {@code #} or {@code //} can be used to indicates one-line comments
 *
 * <p><b>Assignment Rules</b> are used to assign values to variables.
 *     The basic format is: {@code variable = value}
 * </p>
 * <p>
 * <em>Variable Names</em>:
 *   <ul>
 *     <li>Variable names should follow the pattern [A-Za-z_][A-Za-z0-9_]*</li>
 *     <li>Variable names for use in regular expressions (to be expanded later) must start with {@code $}</li>
 *   </ul>
 * </p>
 * <p>
 * <em>Value Types</em>:
 * <table>
 *   <tr><th>Type</th><th>Format</th><th>Example</th><th>Description</th></tr>
 *   <tr><td>{@code BOOLEAN}</td><td>{@code TRUE | FALSE}</td><td>{@code TRUE}</td><td></td></tr>
 *   <tr><td>{@code STRING}</td><td>{@code "..."}</td><td>{@code "red"}</td><td></td></tr>
 *   <tr><td>{@code INTEGER}</td><td>{@code [+-]\d+}</td><td>{@code 1500}</td><td></td></tr>
 *   <tr><td>{@code LONG}</td><td>{@code [+-]\d+L}</td><td>{@code 1500000000000L}</td><td></td></tr>
 *   <tr><td>{@code DOUBLE}</td><td>{@code [+-]\d*\.\d+}</td><td>{@code 6.98}</td><td></td></tr>
 *   <tr><td>{@code REGEX}</td><td>{@code /.../}</td><td>{@code /[Aa]pril/}</td>
 *       <td>String regular expression {@link Pattern}</td></tr>
 *   <tr><td>{@code TOKENS_REGEX}</td><td>{@code ( [...] [...] ... ) }</td><td>{@code ( /up/ /to/ /4/ /months/ )}</td>
 *       <td>Tokens regular expression {@link TokenSequencePattern}</td></tr>
 *   <tr><td>{@code LIST}</td><td>{@code ( [item1] , [item2], ... )}</td><td>{@code ("red", "blue", "yellow" )}</td>
 *       <td></td></tr>
 * </table>
 * </p>
 * <p>
 * Some typical uses and examples for assignment rules include:
 * <ol>
 *  <li>Assignment of value to variables for use in later rules</li>
 *  <li>Binding of text key to annotation key (as {@code Class}).
 *    <pre>
 *      tokens = { type: "CLASS", value: "edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation" }
 *    </pre>
 *  </li>
 *  <li>Defining regular expressions macros to be embedded in other regular expressions
 *    <pre>
 *      $SEASON = "/spring|summer|fall|autumn|winter/"
 *      $NUM = ( [ { numcomptype:NUMBER } ] )
 *    </pre>
 *  </li>
 *  <li>Setting default environment variables.
 *      Rules are applied with respect to an environment ({@link Env}), which can be accessed using the variable {@code ENV}.
 *      Members of the Environment can be set as needed.
 *    <pre>
 *      # Set default parameters to be used when reading rules
 *      ENV.defaults["ruleType"] = "tokens"
 *      # Set default string pattern flags (to case-insensitive)
 *      ENV.defaultStringPatternFlags = 2
 *      # Specifies that the result should go into the {@code tokens}  key (as defined above).
 *      ENV.defaultResultAnnotationKey = tokens
 *    </pre>
 *  </li>
 *  <li>Defining options</li>
 * </ol>
 * </p>
 *
 * Predefined values are:
 * <table>
 *   <tr><th>Variable</th><th>Type</th><th>Description</th></tr>
 *   <tr><td>{@code ENV}</td><td>{@link Env}</td><td>The environment with respect to which the rules are applied.</td></tr>
 *   <tr><td>{@code TRUE}</td><td>{@code BOOLEAN}</td><td>The {@code Boolean}  value {@code true}.</td></tr>
 *   <tr><td>{@code FALSE}</td><td>{@code BOOLEAN}</td><td>The {@code Boolean} value {@code false}.</td></tr>
 *   <tr><td>{@code NIL}</td><td>{@code}</td><td>The {@code null} value.</td></tr>
 *   <tr><td>{@code tags}</td><td>{@code Class}</td><td>The annotation key {@link edu.stanford.nlp.ling.tokensregex.types.Tags.TagsAnnotation}.</td></tr>
 * </table>
 * </p>

 * <p><b>Extraction Rules</b> specifies how regular expression patterns are to be matched against text.
 * See {@link CoreMapExpressionExtractor} for more information on the types of the rules, and in what sequence the rules are applied.
 * A basic rule can be specified using the following template:
 * <pre>{
 *        # Type of the rule
 *        ruleType: "tokens" | "text" | "composite" | "filter",
 *        # Pattern to match against
 *        pattern: ( <TokenSequencePattern> ) | /<TextPattern>/,
 *        # Resulting value to go into the resulting annotation
 *        result: ...
 *
 *        # More fields following...
 *      }
 * </pre>
 * Example:
 * <pre>
 *   {
 *     ruleType: "tokens",
 *     pattern: ( /one/ ),
 *     result: 1
 *   }
 * </pre>
 * </p>
 * Extraction rule fields (most fields are optional):
 * <table>
 *   <tr><th>Field</th><th>Values</th><th>Example</th><th>Description</th></tr>
 *   <tr><td>{@code ruleType}</td><td>{@code "tokens" | "text" | "composite" | "filter" }</td>
 *      <td>{@code tokens}</td><td>Type of the rule (required).</td></tr>
 *   <tr><td>{@code pattern}</td><td>{@code <Token Sequence Pattern> = (...) | <Text Pattern> = /.../}</td>
 *      <td>{@code ( /winter/ /of/ $YEAR )}</td><td>Pattern to match against.
 *      See {@link TokenSequencePattern} and {@link Pattern} for
 *      how to specify patterns over tokens and strings (required).</td></tr>
 *   <tr><td>{@code action}</td><td>{@code <Action List> = (...)}</td>
 *      <td>{@code ( Annotate($0, ner, "DATE") )}</td><td>List of actions to apply when the pattern is triggered.
 *      Each action is a {@link Expressions TokensRegex Expression}</td></tr>
 *   <tr><td>{@code result}</td><td>{@code <Expression>}</td>
 *      <td>{@code}</td><td>Resulting value to go into the resulting annotation.  See {@link Expressions} for how to specify the result.</td></tr>
 *   <tr><td>{@code name}</td><td>{@code STRING}</td>
 *      <td>{@code}</td><td>Name to identify the extraction rule.</td></tr>
 *   <tr><td>{@code stage}</td><td>{@code INTEGER}</td>
 *      <td>{@code}</td><td>Stage at which the rule is to be applied.  Rules are grouped in stages, which are applied from lowest to highest.</td></tr>
 *   <tr><td>{@code active}</td><td>{@code Boolean}</td>
 *      <td>{@code}</td><td>Whether this rule is enabled (active) or not (default true).</td></tr>
 *   <tr><td>{@code priority}</td><td>{@code DOUBLE}</td>
 *      <td>{@code}</td><td>Priority of rule.  Within a stage, matches from higher priority rules are preferred.</td></tr>
 *   <tr><td>{@code weight}</td><td>{@code DOUBLE}</td>
 *      <td>{@code}</td><td>Weight of rule (not currently used).</td></tr>
 *   <tr><td>{@code over}</td><td>{@code CLASS}</td>
 *      <td>{@code}</td><td>Annotation field to check pattern against.</td></tr>
 *   <tr><td>{@code matchFindType}</td><td>{@code FIND_NONOVERLAPPING | FIND_ALL}</td>
 *      <td>{@code}</td><td>Whether to find all matched expression or just the nonoverlapping ones (default {@code FIND_NONOVERLAPPING}).</td></tr>
 *   <tr><td>{@code matchWithResults}</td><td>{@code Boolean}</td>
 *      <td>{@code}</td><td>Whether results of the matches should be returned (default false).
 *        Set to true to access captured groups of embedded regular expressions.</td></tr>
 *   <tr><td>{@code matchedExpressionGroup}</td><td>{@code Integer}</td>
 *      <td>{@code 2}</td><td>What group should be treated as the matched expression group (default 0).</td></tr>
 * </table>
 *
 * @author Angel Chang
 * @see CoreMapExpressionExtractor
 * @see TokenSequencePattern
 */
public class SequenceMatchRules {

  private SequenceMatchRules() { } // static class with inner classes

  /** A sequence match rule */
  public interface Rule {
  }

  /**
   * Rule that specifies what value to assign to a variable
   */
  public static class AssignmentRule implements Rule {
    Expression expr;

    public AssignmentRule(AssignableExpression varExpr, Expression value) {
      expr = varExpr.assign(value);
    }

    public void evaluate(Env env) {
      expr.evaluate(env);
    }
  }

  /**
   * Rule that specifies how to extract sequence of MatchedExpression from an annotation (CoreMap).
   * @param <T> Output type (MatchedExpression)
   */
  public static class AnnotationExtractRule<S, T extends MatchedExpression> implements Rule, ExtractRule<S,T>, Predicate<T>, Serializable {

    private static final long serialVersionUID = -2148125332223720424L;

    /** Name of the rule */
    public String name;
    /** Stage in which this rule should be applied with respect to others */
    public int stage = 1;
    /** Priority in which this rule should be applied with respect to others */
    public double priority;
    /** Weight given to the rule (how likely is this rule to fire) */
    public double weight;
    /** Annotation field to apply rule over: text or tokens or numerizedtokens */
    public Class annotationField;
    public Class tokensAnnotationField;
    /**  Annotation field(s) on individual tokens to put new annotation */
    public List<Class> tokensResultAnnotationField;
    /**  Annotation field(s) to put new annotation */
    public List<Class> resultAnnotationField;
    /** Annotation field for child/nested annotations */
    public Class resultNestedAnnotationField;
    public SequenceMatcher.FindType matchFindType;
    /** Which group to take as the matched expression - default is 0 */
    public int matchedExpressionGroup;
    public boolean matchWithResults;
    // TODO: Combine ruleType and isComposite
    /** Type of rule to apply: token string match, pattern string match */
    public String ruleType;
    public boolean isComposite;
    public boolean includeNested = true;  // TODO: Get parameter from somewhere....
    public boolean active = true;
    /** Actual rule performing the extraction (converting annotation to MatchedExpression) */
    public ExtractRule<S, T> extractRule;
    public Predicate<T> filterRule;
    /** Pattern - the type of which is dependent on the rule type */
    public Object pattern;
    public Expression result;

    public void update(Env env, Map<String, Object> attributes) {
      for (Map.Entry<String, Object> stringObjectEntry : attributes.entrySet()) {
        String key = stringObjectEntry.getKey();
        Object obj = stringObjectEntry.getValue();
        switch (key) {
          case "name":
            name = (String) Expressions.asObject(env, obj);
            break;
          case "priority":
            priority = ((Number) Expressions.asObject(env, obj)).doubleValue();
            break;
          case "stage":
            stage = ((Number) Expressions.asObject(env, obj)).intValue();
            break;
          case "weight":
            weight = ((Number) Expressions.asObject(env, obj)).doubleValue();
            break;
          case "over":
            Object annoKey = Expressions.asObject(env, obj);
            if (annoKey instanceof Class) {
              annotationField = (Class) annoKey;
            } else if (annoKey instanceof String) {
              annotationField = EnvLookup.lookupAnnotationKeyWithClassname(env, (String) annoKey);
            } else if (annotationField == null) {
              annotationField = CoreMap.class;
            } else {
              throw new IllegalArgumentException("Invalid annotation key " + annoKey);
            }
            break;
          case "active":
            active = (Boolean) Expressions.asObject(env, obj);
            break;
          case "ruleType":
            ruleType = (String) Expressions.asObject(env, obj);
            break;
          case "matchFindType":
            matchFindType = SequenceMatcher.FindType.valueOf((String) Expressions.asObject(env, obj));
            break;
          case "matchWithResults":
            matchWithResults = ((Boolean) Expressions.asObject(env, obj)).booleanValue();
            break;
          case "matchedExpressionGroup":
            matchedExpressionGroup = ((Number) Expressions.asObject(env, obj)).intValue();
            break;
        }
      }
    }

    @Override
    public boolean extract(S in, List<T> out) {
      return extractRule.extract(in, out);
    }

    @Override
    public boolean test(T obj) {
      return filterRule.test(obj);
    }

    public boolean isMostlyCompatible(AnnotationExtractRule<S, T> aer) {
      // TODO: Check tokensResultAnnotationField, resultAnnotationField, resultNestedAnnotationField?
      return (stage == aer.stage
        && Objects.equals(annotationField, aer.annotationField)
        && Objects.equals(tokensAnnotationField, aer.tokensAnnotationField)
        && matchedExpressionGroup == 0
        && aer.matchedExpressionGroup == 0
        && matchWithResults == aer.matchWithResults
        && Objects.equals(ruleType, aer.ruleType)
        && isComposite == aer.isComposite
        && active == aer.active
        && Objects.equals(result, aer.result));
    }

    public boolean hasTokensRegexPattern() {
      return pattern != null && pattern instanceof TokenSequencePattern;
    }

    public String toString() { return getClass().getSimpleName() + '[' + pattern.toString() + ']'; }

  } // end static class AnnotationExtractRule


  public static AssignmentRule createAssignmentRule(Env env, AssignableExpression var, Expression result) {
    AssignmentRule ar = new AssignmentRule(var, result);
    ar.evaluate(env);
    return ar;
  }

  public static Rule createRule(Env env, Expressions.CompositeValue cv) {
    Map<String, Object> attributes;
    cv = cv.simplifyNoTypeConversion(env);
    attributes = new HashMap<>();//Generics.newHashMap();
    for (String s:cv.getAttributes()) {
      attributes.put(s, cv.getExpression(s));
    }
    return createExtractionRule(env, attributes);
  }

  protected static AnnotationExtractRule createExtractionRule(Env env, Map<String,Object> attributes) {
    String ruleType = (String) Expressions.asObject(env, attributes.get("ruleType"));
    if (ruleType == null && env != null) {
      ruleType = (String) env.getDefaults().get("ruleType");
    }
    AnnotationExtractRuleCreator ruleCreator = lookupExtractRuleCreator(env, ruleType);
    if (ruleCreator != null) {
      return ruleCreator.create(env, attributes);
    } else {
      throw new IllegalArgumentException("Unknown rule type: " + ruleType);
    }
  }

  public static AnnotationExtractRule createExtractionRule(Env env, String ruleType, Object pattern, Expression result) {
    if (ruleType == null && env != null) {
      ruleType = (String) env.getDefaults().get("ruleType");
    }
    AnnotationExtractRuleCreator ruleCreator = lookupExtractRuleCreator(env, ruleType);
    if (ruleCreator != null) {
      Map<String,Object> attributes = new HashMap<>();//Generics.newHashMap();
      attributes.put("ruleType", ruleType);
      attributes.put("pattern", pattern);
      attributes.put("result", result);
      return ruleCreator.create(env, attributes);
    } else {
      throw new IllegalArgumentException("Unknown rule type: " + ruleType);
    }
  }

  public static final String COMPOSITE_RULE_TYPE = "composite";
  public static final String TOKEN_PATTERN_RULE_TYPE = "tokens";
  public static final String TEXT_PATTERN_RULE_TYPE = "text";
  public static final String FILTER_RULE_TYPE = "filter";
  public static final TokenPatternExtractRuleCreator TOKEN_PATTERN_EXTRACT_RULE_CREATOR = new TokenPatternExtractRuleCreator();
  public static final CompositeExtractRuleCreator COMPOSITE_EXTRACT_RULE_CREATOR = new CompositeExtractRuleCreator();
  public static final TextPatternExtractRuleCreator TEXT_PATTERN_EXTRACT_RULE_CREATOR = new TextPatternExtractRuleCreator();
  public static final MultiTokenPatternExtractRuleCreator MULTI_TOKEN_PATTERN_EXTRACT_RULE_CREATOR = new MultiTokenPatternExtractRuleCreator();
  public static final AnnotationExtractRuleCreator DEFAULT_EXTRACT_RULE_CREATOR = TOKEN_PATTERN_EXTRACT_RULE_CREATOR;
  private static final Map<String, AnnotationExtractRuleCreator> registeredRuleTypes = new HashMap<>();//Generics.newHashMap();
  static {
    registeredRuleTypes.put(TOKEN_PATTERN_RULE_TYPE, TOKEN_PATTERN_EXTRACT_RULE_CREATOR);
    registeredRuleTypes.put(COMPOSITE_RULE_TYPE, COMPOSITE_EXTRACT_RULE_CREATOR);
    registeredRuleTypes.put(TEXT_PATTERN_RULE_TYPE, TEXT_PATTERN_EXTRACT_RULE_CREATOR);
    registeredRuleTypes.put(FILTER_RULE_TYPE, TOKEN_PATTERN_EXTRACT_RULE_CREATOR);
  }

  private static AnnotationExtractRuleCreator lookupExtractRuleCreator(Env env, String ruleType) {
    if (env != null) {
      Object obj = env.get(ruleType);
      if (obj != null && obj instanceof AnnotationExtractRuleCreator) {
        return (AnnotationExtractRuleCreator) obj;
      }
    }
    if (ruleType == null) {
      return DEFAULT_EXTRACT_RULE_CREATOR;
    } else {
      return registeredRuleTypes.get(ruleType);
    }
  }

  public static AnnotationExtractRule createTokenPatternRule(Env env, SequencePattern.PatternExpr expr, Expression result) {
    return TOKEN_PATTERN_EXTRACT_RULE_CREATOR.create(env, expr, result);
  }

  public static AnnotationExtractRule createTextPatternRule(Env env, String expr, Expression result) {
    return TEXT_PATTERN_EXTRACT_RULE_CREATOR.create(env, expr, result);
  }

  public static AnnotationExtractRule createMultiTokenPatternRule(Env env, AnnotationExtractRule template, List<TokenSequencePattern> patterns) {
    return MULTI_TOKEN_PATTERN_EXTRACT_RULE_CREATOR.create(env, template, patterns);
  }

  public static class AnnotationExtractRuleCreator {
    public AnnotationExtractRule create(Env env) {
      AnnotationExtractRule r = new AnnotationExtractRule();
      r.resultAnnotationField = EnvLookup.getDefaultResultAnnotationKey(env);
      r.resultNestedAnnotationField = EnvLookup.getDefaultNestedResultsAnnotationKey(env);
      r.tokensAnnotationField = EnvLookup.getDefaultTokensAnnotationKey(env);
      r.tokensResultAnnotationField = EnvLookup.getDefaultTokensResultAnnotationKey(env);
      if (env != null) {
        r.update(env, env.getDefaults());
      }
      return r;
    }

    public AnnotationExtractRule create(Env env, Map<String,Object> attributes) {
      // Get default annotation extract rule from env
      AnnotationExtractRule r = create(env);
      if (attributes != null) {
        r.update(env, attributes);
      }
      return r;
    }
  }

  public static MatchedExpression.SingleAnnotationExtractor createAnnotationExtractor(Env env, AnnotationExtractRule r) {
    MatchedExpression.SingleAnnotationExtractor extractor = new MatchedExpression.SingleAnnotationExtractor();
    extractor.name = r.name;
    extractor.tokensAnnotationField = r.tokensAnnotationField;
    extractor.tokensResultAnnotationField = r.tokensResultAnnotationField;
    extractor.resultAnnotationField = r.resultAnnotationField;
    extractor.resultNestedAnnotationField = r.resultNestedAnnotationField;
    extractor.priority = r.priority;
    extractor.weight = r.weight;
    extractor.includeNested = r.includeNested;
    extractor.resultAnnotationExtractor = EnvLookup.getDefaultResultAnnotationExtractor(env);
    extractor.tokensAggregator = EnvLookup.getDefaultTokensAggregator(env);
    return extractor;
  }


  public static class CompositeExtractRuleCreator extends AnnotationExtractRuleCreator {

    protected static void updateExtractRule(AnnotationExtractRule r,
                                     Env env,
                                     SequencePattern.PatternExpr expr,
                                     Expression action,
                                     Expression result) {
      TokenSequencePattern pattern = TokenSequencePattern.compile(expr);
      updateExtractRule(r, env, pattern, action, result);
    }

    protected static void updateExtractRule(AnnotationExtractRule r,
                                     Env env,
                                     TokenSequencePattern pattern,
                                     Expression action,
                                     Expression result) {
      MatchedExpression.SingleAnnotationExtractor annotationExtractor = createAnnotationExtractor(env, r);
      SequenceMatchResultExtractor<CoreMap> valueExtractor = new SequenceMatchResultExtractor<>(env, action, result);
      SequencePatternExtractRule<CoreMap,Value> valueExtractRule = new SequencePatternExtractRule<>(pattern, valueExtractor, r.matchFindType, r.matchWithResults);
      SequenceMatchedExpressionExtractor exprExtractor = new SequenceMatchedExpressionExtractor( annotationExtractor, r.matchedExpressionGroup );
      SequencePatternExtractRule<CoreMap, MatchedExpression> exprExtractRule =
        new SequencePatternExtractRule<>(pattern, exprExtractor, r.matchFindType, r.matchWithResults);

      annotationExtractor.expressionToValue = matched -> {
        if (matched != null && matched.context != null && matched.context instanceof SequenceMatchResult ) {
          return valueExtractor.apply( (SequenceMatchResult<CoreMap>) matched.context);
        } else return null;
      };
      annotationExtractor.valueExtractor = new CoreMapFunctionApplier<>(env, r.annotationField, valueExtractRule);
      r.extractRule = exprExtractRule;
      r.filterRule = new AnnotationMatchedFilter(annotationExtractor);
      r.pattern = pattern;
      r.result = result;
      pattern.weight = r.weight;
      pattern.priority = r.priority;
    }

    protected AnnotationExtractRule create(Env env, SequencePattern.PatternExpr expr, Expression result) {
      AnnotationExtractRule r = super.create(env, null);
      r.isComposite = true;
      if (r.annotationField == null) { r.annotationField = r.resultNestedAnnotationField;  }
      if (r.annotationField == null) { throw new IllegalArgumentException("Error creating composite rule: no annotation field"); }
      r.ruleType = TOKEN_PATTERN_RULE_TYPE;
      updateExtractRule(r, env, expr, null, result);
      return r;
    }

    @Override
    public AnnotationExtractRule create(Env env, Map<String,Object> attributes) {
      AnnotationExtractRule r = super.create(env, attributes);
      r.isComposite = true;
      if (r.annotationField == null) { r.annotationField = r.resultNestedAnnotationField;  }
      if (r.annotationField == null) { throw new IllegalArgumentException("Error creating composite rule: no annotation field"); }
      if (r.ruleType == null) { r.ruleType = TOKEN_PATTERN_RULE_TYPE; }
      //SequencePattern.PatternExpr expr = (SequencePattern.PatternExpr) attributes.get("pattern");
      TokenSequencePattern expr = (TokenSequencePattern) Expressions.asObject(env, attributes.get("pattern"));
      Expression action = Expressions.asExpression(env, attributes.get("action"));
      Expression result = Expressions.asExpression(env, attributes.get("result"));
      updateExtractRule(r, env, expr, action, result);
      return r;
    }

  }


  public static class TokenPatternExtractRuleCreator extends AnnotationExtractRuleCreator {

    protected static void updateExtractRule(AnnotationExtractRule r,
                                     Env env,
                                     SequencePattern.PatternExpr expr,
                                     Expression action,
                                     Expression result)
    {
      TokenSequencePattern pattern = TokenSequencePattern.compile(expr);
      updateExtractRule(r, env, pattern, action, result);
    }

    protected static void updateExtractRule(AnnotationExtractRule r,
                                     Env env,
                                     TokenSequencePattern pattern,
                                     Expression action,
                                     Expression result) {
      MatchedExpression.SingleAnnotationExtractor annotationExtractor = createAnnotationExtractor(env, r);
      SequenceMatchResultExtractor<CoreMap> valueExtractor = new SequenceMatchResultExtractor<>(env, action, result);
      SequencePatternExtractRule<CoreMap,Value> valueExtractRule = new SequencePatternExtractRule<>(pattern, valueExtractor, r.matchFindType, r.matchWithResults);
      SequenceMatchedExpressionExtractor exprExtractor = new SequenceMatchedExpressionExtractor( annotationExtractor, r.matchedExpressionGroup );
      SequencePatternExtractRule<CoreMap, MatchedExpression> exprExtractRule =
        new SequencePatternExtractRule<>(pattern, exprExtractor, r.matchFindType, r.matchWithResults);

      annotationExtractor.expressionToValue = matched -> {
        if (matched != null && matched.context != null && matched.context instanceof SequenceMatchResult ) {
          return valueExtractor.apply( (SequenceMatchResult<CoreMap>) matched.context);
        } else return null;
      };
      if (r.annotationField != null && r.annotationField != CoreMap.class) {
        annotationExtractor.valueExtractor = new CoreMapFunctionApplier<>(env, r.annotationField, valueExtractRule);
        r.extractRule = new CoreMapExtractRule<>(env, r.annotationField, exprExtractRule);
      } else {
        annotationExtractor.valueExtractor = new CoreMapToListFunctionApplier<>(env, valueExtractRule);
        r.extractRule = new CoreMapToListExtractRule<>(exprExtractRule);
      }
      r.filterRule = new AnnotationMatchedFilter(annotationExtractor);
      r.pattern = pattern;
      r.result = result;
      pattern.weight = r.weight;
      pattern.priority = r.priority;
    }

    protected AnnotationExtractRule create(Env env, SequencePattern.PatternExpr expr, Expression result) {
      AnnotationExtractRule r = super.create(env, null);
      if (r.annotationField == null) { r.annotationField = r.tokensAnnotationField;  }
      r.ruleType = TOKEN_PATTERN_RULE_TYPE;
      updateExtractRule(r, env, expr, null, result);
      return r;
    }

    @Override
    public AnnotationExtractRule create(Env env, Map<String,Object> attributes) {
      AnnotationExtractRule r = super.create(env, attributes);
      if (r.annotationField == null) { r.annotationField = r.tokensAnnotationField;  }
      if (r.ruleType == null) { r.ruleType = TOKEN_PATTERN_RULE_TYPE; }
      //SequencePattern.PatternExpr expr = (SequencePattern.PatternExpr) attributes.get("pattern");
      TokenSequencePattern expr = (TokenSequencePattern) Expressions.asObject(env, attributes.get("pattern"));
      Expression action = Expressions.asExpression(env, attributes.get("action"));
      Expression result = Expressions.asExpression(env, attributes.get("result"));
      updateExtractRule(r, env, expr, action, result);
      return r;
    }
  }


  public static class MultiTokenPatternExtractRuleCreator extends AnnotationExtractRuleCreator {

    protected static void updateExtractRule(AnnotationExtractRule r,
                                     Env env,
                                     MultiPatternMatcher<CoreMap> pattern,
                                     Expression action,
                                     Expression result) {
      MatchedExpression.SingleAnnotationExtractor annotationExtractor = createAnnotationExtractor(env, r);
      SequenceMatchResultExtractor<CoreMap> valueExtractor = new SequenceMatchResultExtractor<>(env, action, result);
      MultiSequencePatternExtractRule<CoreMap,Value> valueExtractRule = new MultiSequencePatternExtractRule<>(pattern, valueExtractor);
      SequenceMatchedExpressionExtractor exprExtractor = new SequenceMatchedExpressionExtractor( annotationExtractor, r.matchedExpressionGroup );
      MultiSequencePatternExtractRule<CoreMap, MatchedExpression> exprExtractRule =
        new MultiSequencePatternExtractRule<>(pattern, exprExtractor);

      annotationExtractor.expressionToValue = matched -> {
        if (matched != null && matched.context != null && matched.context instanceof SequenceMatchResult ) {
          return valueExtractor.apply( (SequenceMatchResult<CoreMap>) matched.context);
        } else return null;
      };
      if (r.annotationField != null && r.annotationField != CoreMap.class) {
        annotationExtractor.valueExtractor = new CoreMapFunctionApplier<>(env, r.annotationField, valueExtractRule);
        r.extractRule = new CoreMapExtractRule<>(env, r.annotationField, exprExtractRule);
      } else {
        annotationExtractor.valueExtractor = new CoreMapToListFunctionApplier<>(env, valueExtractRule);
        r.extractRule = new CoreMapToListExtractRule<>(exprExtractRule);
      }
      r.filterRule = new AnnotationMatchedFilter(annotationExtractor);
      r.pattern = pattern;
      r.result = result;
    }

    protected static AnnotationExtractRule create(Env env, SequenceMatchRules.AnnotationExtractRule aerTemplate, List<TokenSequencePattern> patterns) {
      AnnotationExtractRule r = new AnnotationExtractRule();
      r.stage = aerTemplate.stage;
      r.active = aerTemplate.active;
      r.priority = Double.NaN; // Priority from patterns?
      r.weight = Double.NaN;  // weight from patterns?
      r.annotationField = aerTemplate.annotationField;
      r.tokensAnnotationField = aerTemplate.tokensAnnotationField;
      r.tokensResultAnnotationField = aerTemplate.tokensResultAnnotationField;
      r.resultAnnotationField = aerTemplate.resultAnnotationField;
      r.resultNestedAnnotationField = aerTemplate.resultNestedAnnotationField;
      r.matchFindType = aerTemplate.matchFindType;
      r.matchedExpressionGroup = aerTemplate.matchedExpressionGroup;
      r.matchWithResults = aerTemplate.matchWithResults;
      r.ruleType = aerTemplate.ruleType;
      r.isComposite = aerTemplate.isComposite;
      r.includeNested = aerTemplate.includeNested;
      r.active = aerTemplate.active;
      r.result = aerTemplate.result;

      if (r.annotationField == null) { r.annotationField = r.tokensAnnotationField;  }
      r.ruleType = TOKEN_PATTERN_RULE_TYPE;
      MultiPatternMatcher<CoreMap> multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(patterns);
      multiPatternMatcher.setMatchWithResult(r.matchWithResults);
      updateExtractRule(r, env, multiPatternMatcher, null, r.result);
      return r;
    }

    @Override
    public AnnotationExtractRule create(Env env, Map<String,Object> attributes) {
      throw new UnsupportedOperationException();
    }
  }


  public static class TextPatternExtractRuleCreator extends AnnotationExtractRuleCreator {

    protected static void updateExtractRule(AnnotationExtractRule r,
                                     Env env,
                                     String expr,
                                     Expression action,
                                     Expression result) {
      final MatchedExpression.SingleAnnotationExtractor annotationExtractor = createAnnotationExtractor(env, r);
      Pattern pattern = env.getStringPattern(expr);
      StringMatchResultExtractor valueExtractor = new StringMatchResultExtractor(env, action, result);
      StringPatternExtractRule<Value> valueExtractRule = new StringPatternExtractRule<>(pattern, valueExtractor);
      StringMatchedExpressionExtractor exprExtractor = new StringMatchedExpressionExtractor( annotationExtractor, r.matchedExpressionGroup );
      StringPatternExtractRule<MatchedExpression> exprExtractRule = new StringPatternExtractRule<>(pattern, exprExtractor);

      annotationExtractor.valueExtractor = new CoreMapFunctionApplier<>(env, r.annotationField, valueExtractRule);
      r.extractRule = new CoreMapExtractRule<>(env, r.annotationField, exprExtractRule);
      r.filterRule = new AnnotationMatchedFilter(annotationExtractor);
      r.pattern = pattern;
      r.result = result;
    }

    protected AnnotationExtractRule create(Env env, String expr, Expression result) {
      AnnotationExtractRule r = super.create(env, null);
      if (r.annotationField == null) { r.annotationField = EnvLookup.getDefaultTextAnnotationKey(env);  }
      r.ruleType = TEXT_PATTERN_RULE_TYPE;
      updateExtractRule(r, env, expr, null, result);
      return r;
    }

    @Override
    public AnnotationExtractRule create(Env env, Map<String,Object> attributes) {
      AnnotationExtractRule r = super.create(env, attributes);
      if (r.annotationField == null) { r.annotationField = EnvLookup.getDefaultTextAnnotationKey(env);  }
      if (r.ruleType == null) { r.ruleType = TEXT_PATTERN_RULE_TYPE; }
      String expr = (String) Expressions.asObject(env, attributes.get("pattern"));
      Expression action = Expressions.asExpression(env, attributes.get("action"));
      Expression result = Expressions.asExpression(env, attributes.get("result"));
      updateExtractRule(r, env, expr, action, result);
      return r;
    }
  }

  public static class AnnotationMatchedFilter implements Predicate<MatchedExpression>, Serializable {

    MatchedExpression.SingleAnnotationExtractor extractor;

    public AnnotationMatchedFilter(MatchedExpression.SingleAnnotationExtractor extractor) {
      this.extractor = extractor;
    }

    @Override
    public boolean test(MatchedExpression me) {
      CoreMap cm = me.getAnnotation();
      Value v = extractor.apply(cm);
      if (v != null) {
        if (v.get() == null) {
          return true;
        } else {
          extractor.annotate(me);
          return false;
        }
        //return v.get() == null;
      } else {
        return false;
      }
    }
  }

  public static class StringMatchResultExtractor implements Function<MatchResult,Value> {
    Env env;
    Expression action;
    Expression result;

    public StringMatchResultExtractor(Env env, Expression action, Expression result) {
      this.env = env;
      this.action = action;
      this.result = result;
    }

    public StringMatchResultExtractor(Env env, Expression result) {
      this.env = env;
      this.result = result;
    }

    @Override
    public Value apply(MatchResult matchResult) {
      Value v = null;
      if (action != null) {
        action.evaluate(env, matchResult);
      }
      if (result != null) {
        v = result.evaluate(env, matchResult);
      }
      return v;
    }
  }

  public static class SequenceMatchResultExtractor<T> implements Function<SequenceMatchResult<T>,Value> {
    Env env;
    Expression action;
    Expression result;

    public SequenceMatchResultExtractor(Env env, Expression action, Expression result) {
      this.env = env;
      this.action = action;
      this.result = result;
    }

    public SequenceMatchResultExtractor(Env env, Expression result) {
      this.env = env;
      this.result = result;
    }

    @Override
    public Value apply(SequenceMatchResult<T> matchResult) {
      Value v = null;
      if (action != null) {
        action.evaluate(env, matchResult);
      }
      if (result != null) {
        v = result.evaluate(env, matchResult);
      }
      return v;
    }
  }

  /**
   * Interface for a rule that extracts a list of matched items from an input.
   *
   * @param <I> input type
   * @param <O> output type
   */
  public interface ExtractRule<I,O> {

    boolean extract(I in, List<O> out);

  }

  /**
   * Extraction rule that filters the input before passing it on to the next extractor
   * @param <I> input type
   * @param <O> output type
   */
  public static class FilterExtractRule<I,O> implements ExtractRule<I,O> {

    Predicate<I> filter;
    ExtractRule<I,O> rule;

    public FilterExtractRule(Predicate<I> filter, ExtractRule<I,O> rule) {
      this.filter = filter;
      this.rule = rule;
    }

    @SafeVarargs
    public FilterExtractRule(Predicate<I> filter, ExtractRule<I,O>... rules) {
      this.filter = filter;
      this.rule = new ListExtractRule<>(rules);
    }

    @Override
    public boolean extract(I in, List<O> out) {
      if (filter.test(in)) {
        return rule.extract(in,out);
      } else {
        return false;
      }
    }
  }

  /**
   * Extraction rule that applies a list of rules in sequence and aggregates
   * all matches found.
   *
   * @param <I> input type
   * @param <O> output type
   */
  public static class ListExtractRule<I,O> implements ExtractRule<I,O> {

    List<ExtractRule<I,O>> rules;

    public ListExtractRule(Collection<ExtractRule<I,O>> rules) {
      this.rules = new ArrayList<>(rules);
    }

    @SafeVarargs
    public ListExtractRule(ExtractRule<I,O>... rules) {
      this.rules = new ArrayList<>(rules.length);
      Collections.addAll(this.rules, rules);
    }

    @Override
    public boolean extract(I in, List<O> out) {
      boolean extracted = false;
      for (ExtractRule<I,O> rule:rules) {
        if (rule.extract(in,out)) {
          extracted = true;
        }
      }
      return extracted;
    }

    @SafeVarargs
    public final void addRules(ExtractRule<I, O>... rules) {
      Collections.addAll(this.rules, rules);
    }

    public void addRules(Collection<ExtractRule<I,O>> rules) {
      this.rules.addAll(rules);
    }

    public String ruleList() {
      List<String> names = new ArrayList<>();
      for (ExtractRule rule: rules) {
        if (rule instanceof AnnotationExtractRule) {
          AnnotationExtractRule aer = (AnnotationExtractRule) rule;
          String ruleString = null;
          if (aer.pattern != null) {
            ruleString = aer.pattern.toString();
          } else if (aer.extractRule != null) {
            ruleString = aer.extractRule.toString();
          } else if (aer.filterRule != null) {
            ruleString = aer.filterRule.toString();
          } else {
            ruleString = aer.toString();
          }
          names.add(ruleString);
        } else {
          names.add(rule.getClass().getName());
        }
      }
      return names.toString();
    }

    public String toString() {
      return "ListExtractRule[" + ruleList() + "]";
    }

  }

  /**
   * Extraction rule to apply a extraction rule on a particular CoreMap field
   * Input is of type CoreMap, output is templated type O.
   * @param <T> type of the annotation field
   * @param <O> output type
   */
  public static class CoreMapExtractRule<T,O> implements ExtractRule<CoreMap, O>
  {
    Env env;
    Class annotationField;
    ExtractRule<T,O> extractRule;

    public CoreMapExtractRule(Env env, Class annotationField, ExtractRule<T,O> extractRule) {
      this.annotationField = annotationField;
      this.extractRule = extractRule;
      this.env = env;
    }

    @Override
    public boolean extract(CoreMap cm, List<O> out) {
      env.push(Expressions.VAR_SELF, cm);
      T field = (T) cm.get(annotationField);
      boolean res = extractRule.extract(field, out);
      env.pop(Expressions.VAR_SELF);
      return res;
    }

  }

  /**
   * Extraction rule that treats a single CoreMap as a list/sequence of CoreMaps
   * (convenience class, for use with BasicSequenceExtractRule)
   * Input is of type CoreMap, output is templated type O.
   * @param <O> output type
   */
  public static class CoreMapToListExtractRule<O> implements ExtractRule<CoreMap, O>
  {
    ExtractRule<List<? extends CoreMap>,O> extractRule;

    public CoreMapToListExtractRule(ExtractRule<List<? extends CoreMap>,O> extractRule) {
      this.extractRule = extractRule;
    }

    @Override
    public boolean extract(CoreMap cm, List<O> out) {
      return extractRule.extract(Arrays.asList(cm), out);
    }
  }

  /**
   * Extraction rule.
   * Input is of type CoreMap, output is MatchedExpression.
   */
  public static class BasicSequenceExtractRule implements ExtractRule< List<? extends CoreMap>, MatchedExpression>
  {
    MatchedExpression.SingleAnnotationExtractor extractor;

    public BasicSequenceExtractRule(MatchedExpression.SingleAnnotationExtractor extractor) {
      this.extractor = extractor;
    }

    @Override
    public boolean extract(List<? extends CoreMap> seq, List<MatchedExpression> out) {
      boolean extracted = false;
      for (int i = 0; i < seq.size(); i++) {
        CoreMap t = seq.get(i);
        Value v = extractor.apply(t);
        if (v != null) {
          MatchedExpression te = extractor.createMatchedExpression(Interval.toInterval(i, i + 1, Interval.INTERVAL_OPEN_END), null);
          out.add(te);
          extracted = true;
        }
      }
      return extracted;
    }
  }


  public static class SequencePatternExtractRule<T,O> implements ExtractRule< List<? extends T>, O>, Function<List<? extends T>, O> {

    SequencePattern<T> pattern;
    Function<SequenceMatchResult<T>, O> extractor;
    SequenceMatcher.FindType findType = null;
    boolean matchWithResult = false;

    public SequencePatternExtractRule(Env env, String regex, Function<SequenceMatchResult<T>, O> extractor) {
      this.extractor = extractor;
      this.pattern = SequencePattern.compile(env, regex);
    }

    public SequencePatternExtractRule(SequencePattern<T> p, Function<SequenceMatchResult<T>, O> extractor) {
      this.extractor = extractor;
      this.pattern = p;
    }

    public SequencePatternExtractRule(SequencePattern<T> p, Function<SequenceMatchResult<T>, O> extractor,
                                      SequenceMatcher.FindType findType, boolean matchWithResult) {
      this.extractor = extractor;
      this.pattern = p;
      this.findType = findType;
      this.matchWithResult = matchWithResult;
    }

    @Override
    public boolean extract(List<? extends T> seq, List<O> out) {
      if (seq == null) return false;
      boolean extracted = false;
      SequenceMatcher<T> m = pattern.getMatcher(seq);
      if (findType != null) {
        m.setFindType(findType);
      }
      m.setMatchWithResult(matchWithResult);
      while (m.find()) {
        out.add(extractor.apply(m));
        extracted = true;
      }
      // System.err.println("SequencePattern " + pattern + " of type " + pattern.getClass() + " matched on " + extracted);
      return extracted;
    }

    @Override
    public O apply(List<? extends T> seq) {
      if (seq == null) return null;
      SequenceMatcher<T> m = pattern.getMatcher(seq);
      m.setMatchWithResult(matchWithResult);
      if (m.matches()) {
        return extractor.apply(m);
      } else {
        return null;
      }
    }

  } // end static class


  public static class MultiSequencePatternExtractRule<T,O> implements ExtractRule< List<? extends T>, O>, Function<List<? extends T>, O> {
    MultiPatternMatcher<T> matcher;
    Function<SequenceMatchResult<T>, O> extractor;

    public MultiSequencePatternExtractRule(MultiPatternMatcher<T> matcher, Function<SequenceMatchResult<T>, O> extractor) {
      this.extractor = extractor;
      this.matcher = matcher;
    }

    @Override
    public boolean extract(List<? extends T> seq, List<O> out) {
      if (seq == null) return false;
      boolean extracted = false;
      List<SequenceMatchResult<T>> matched = matcher.findNonOverlappingMaxScore(seq);
      for (SequenceMatchResult<T> m : matched) {
        out.add(extractor.apply(m));
        extracted = true;
      }
      return extracted;
    }

    @Override
    public O apply(List<? extends T> seq) {
      if (seq == null) return null;
      List<SequenceMatchResult<T>> matched = matcher.findNonOverlappingMaxScore(seq);
      if (matched.size() > 0) {
        return extractor.apply(matched.get(0));
      } else {
        return null;
      }
    }
  }


  public static class StringPatternExtractRule<O> implements ExtractRule<String, O>, Function<String, O> {

    private final Pattern pattern;
    private final Function<MatchResult, O> extractor;

    public StringPatternExtractRule(Pattern pattern, Function<MatchResult, O> extractor) {
      this.pattern = pattern;
      this.extractor = extractor;
    }

    public StringPatternExtractRule(Env env, String regex, Function<MatchResult, O> extractor) {
      this(env, regex, extractor, false);
    }

    public StringPatternExtractRule(String regex, Function<MatchResult, O> extractor) {
      this(null, regex, extractor, false);
    }

    public StringPatternExtractRule(Env env, String regex, Function<MatchResult, O> extractor,
                                    boolean addWordBoundaries) {
      this.extractor = extractor;
      if (addWordBoundaries) { regex = "\\b" + regex + "\\b"; }
      if (env != null) {
        pattern = env.getStringPattern(regex);
      } else {
        pattern = Pattern.compile(regex);
      }
    }

    @Override
    public boolean extract(String str, List<O> out) {
      if (str == null) return false;
      boolean extracted = false;
      Matcher m = pattern.matcher(str);
      while (m.find()) {
        out.add(extractor.apply( m ));
        // System.err.println("StringPatternExtractRule: " + pattern + " extracted " + out.get(out.size() - 1)); // XXXX
        extracted = true;
      }
      return extracted;
    }

    @Override
    public O apply(String str) {
      if (str == null) return null;
      Matcher m = pattern.matcher(str);
      if (m.matches()) {
        return extractor.apply(m);
      } else {
        return null;
      }
    }

  } // end static class StringPatternExtractRule

  public static class StringMatchedExpressionExtractor implements Function<MatchResult, MatchedExpression>
  {
    MatchedExpression.SingleAnnotationExtractor extractor;
    int group = 0;

    public StringMatchedExpressionExtractor(MatchedExpression.SingleAnnotationExtractor extractor, int group) {
      this.extractor = extractor;
      this.group = group;
    }

    @Override
    public MatchedExpression apply(MatchResult matched) {
      MatchedExpression te = extractor.createMatchedExpression(Interval.toInterval(matched.start(group), matched.end(group), Interval.INTERVAL_OPEN_END), null);
      return te;
    }
  }

  public static class SequenceMatchedExpressionExtractor implements Function<SequenceMatchResult<CoreMap>, MatchedExpression>
  {
    MatchedExpression.SingleAnnotationExtractor extractor;
    int group = 0;

    public SequenceMatchedExpressionExtractor(MatchedExpression.SingleAnnotationExtractor extractor, int group) {
      this.extractor = extractor;
      this.group = group;
    }
    @Override
    public MatchedExpression apply(SequenceMatchResult<CoreMap> matched) {
      MatchedExpression te = extractor.createMatchedExpression(null, Interval.toInterval(matched.start(group), matched.end(group), Interval.INTERVAL_OPEN_END));
      if (Double.isNaN(te.priority)) {
        te.priority = matched.priority();
      }
      if (Double.isNaN(te.weight)) {
        te.weight = matched.score();
      }
      if (this.group != 0) {
        // Save context so value evaluation can happen
        te.context = matched.toBasicSequenceMatchResult();
      }
      return te;
    }
  }

  public static class CoreMapFunctionApplier<T,O> implements Function<CoreMap, O>
  {
    Env env;
    Class annotationField;
    Function<T,O> func;

    public CoreMapFunctionApplier(Env env, Class annotationField, Function<T,O> func) {
      this.annotationField = annotationField;
      if (annotationField == null) {
        throw new IllegalArgumentException("Annotation field cannot be null");
      }
      this.func = func;
      this.env = env;
    }

    @Override
    public O apply(CoreMap cm) {
      if (env != null) { env.push(Expressions.VAR_SELF, cm); }
      T field = (T) cm.get(annotationField);
      O res = func.apply(field);
      if (env != null) { env.pop(Expressions.VAR_SELF); }
      return res;
    }
  }

  public static class CoreMapToListFunctionApplier<O> implements Function<CoreMap, O>
  {
    Env env;
    Function<List<? extends CoreMap>,O> func;

    public CoreMapToListFunctionApplier(Env env, Function<List<? extends CoreMap>,O> func) {
      this.func = func;
      this.env = env;
    }

    @Override
    public O apply(CoreMap cm) {
      if (env != null) { env.push(Expressions.VAR_SELF, cm); }
      O res = func.apply(Collections.singletonList(cm));
      if (env != null) { env.pop(Expressions.VAR_SELF); }
      return res;
    }
  }
}