PatternFactory.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.patterns;

import edu.stanford.nlp.patterns.dep.DepPatternFactory;
import edu.stanford.nlp.patterns.surface.SurfacePatternFactory;
import edu.stanford.nlp.util.ArgumentParser;

import java.util.*;

/**
 * Created by sonalg on 10/27/14.
 */
public class PatternFactory {
  /**
   * allow to match stop words before a target term. This is to match something
   * like "I am on some X" if the pattern is "I am on X"
   */
  @ArgumentParser.Option(name = "useStopWordsBeforeTerm")
  public static boolean useStopWordsBeforeTerm = false;

  /**
   * Add NER restriction to the target phrase in the patterns
   */
  @ArgumentParser.Option(name = "useTargetNERRestriction")
  public static boolean useTargetNERRestriction = false;

  /**
   *
   */
  @ArgumentParser.Option(name="useNER")
  public static boolean useNER = true;
  /**
   * Can just write a number (if same for all labels) or "Label1,2;Label2,3;...."
   */
  @ArgumentParser.Option(name = "numWordsCompound")
  public static String numWordsCompound = "2";

  public static Map<String, Integer> numWordsCompoundMapped = new HashMap<>();

  public static int numWordsCompoundMax = 2;
  /**
   * Use lemma instead of words for the context tokens
   */
  @ArgumentParser.Option(name = "useLemmaContextTokens")
  public static boolean useLemmaContextTokens = true;



  public static List<String> fillerWords = Arrays.asList("a", "an", "the", "`", "``",
    "'", "''");


  /**
   * by default doesn't ignore anything. What phrases to ignore.
   */
  public static java.util.regex.Pattern ignoreWordRegex = java.util.regex.Pattern.compile("a^");

  public static void setUp(Properties props, PatternType patternType, Set<String> labels) {
    ArgumentParser.fillOptions(PatternFactory.class, props);
    numWordsCompoundMax = 0;
    if (numWordsCompound.contains(",") || numWordsCompound.contains(";")) {
      String[] toks = numWordsCompound.split(";");
      for(String t: toks) {
        String[] toks2 = t.split(",");
        int numWords = Integer.valueOf(toks2[1]);
        numWordsCompoundMapped.put(toks2[0], numWords);
        if(numWords > numWordsCompoundMax){
          numWordsCompoundMax = numWords;
        }
      }
    } else
    {
      numWordsCompoundMax = Integer.valueOf(numWordsCompound);
      for(String label: labels){
        numWordsCompoundMapped.put(label, Integer.valueOf(numWordsCompound));
      }
    }
    if(patternType.equals(PatternType.SURFACE))
      SurfacePatternFactory.setUp(props);
    else if(patternType.equals(PatternType.DEP))
      DepPatternFactory.setUp(props);
    else
      throw new UnsupportedOperationException();
  }

  public enum PatternType{SURFACE, DEP};

  public static boolean doNotUse(String word, Set<CandidatePhrase> stopWords) {
    if (stopWords.contains(CandidatePhrase.createOrGet(word.toLowerCase()))
      || ignoreWordRegex.matcher(word).matches())
      return true;
    else
      return false;
  }

  public static Map<Integer, Set> getPatternsAroundTokens(PatternType patternType, DataInstance sent, Set<CandidatePhrase> stopWords) {
      if(patternType.equals(PatternType.SURFACE)){
        return SurfacePatternFactory.getPatternsAroundTokens(sent, stopWords);
      } else if(patternType.equals(PatternType.DEP)){
        return (Map) DepPatternFactory.getPatternsAroundTokens(sent, stopWords);
      } else
        throw new UnsupportedOperationException();
  }

}