FeatureFactory.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.sequences;

import java.util.*;
import java.io.Serializable;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.PaddedList;


/**
 * This is the abstract class that all feature factories must
 * subclass.  It also defines most of the basic {@link Clique}s
 * that you would want to make features over.  It contains a
 * convenient method, getCliques(maxLeft, maxRight) which will give
 * you all the cliques within the specified limits.
 *
 * @param <IN> The type of the items in the PaddedList from which features
 *     are extracted
 *
 * @author Jenny Finkel
 */
public abstract class FeatureFactory<IN> implements Serializable {

  private static final long serialVersionUID = 7249250071983091694L;

  protected SeqClassifierFlags flags;

  public FeatureFactory() {}

  public void init (SeqClassifierFlags flags) {
    this.flags = flags;
  }

  public static final Clique cliqueC = Clique.valueOf(new int[] {0});
  public static final Clique cliqueCpC = Clique.valueOf(new int[] {-1, 0});
  public static final Clique cliqueCp2C = Clique.valueOf(new int[] {-2, 0});
  public static final Clique cliqueCp3C = Clique.valueOf(new int[] {-3, 0});
  public static final Clique cliqueCp4C = Clique.valueOf(new int[] {-4, 0});
  public static final Clique cliqueCp5C = Clique.valueOf(new int[] {-5, 0});
  public static final Clique cliqueCpCp2C = Clique.valueOf(new int[] {-2, -1, 0});
  public static final Clique cliqueCpCp2Cp3C = Clique.valueOf(new int[] {-3, -2, -1, 0});
  public static final Clique cliqueCpCp2Cp3Cp4C = Clique.valueOf(new int[] {-4, -3, -2, -1, 0});
  public static final Clique cliqueCpCp2Cp3Cp4Cp5C = Clique.valueOf(new int[] {-5, -4, -3, -2, -1, 0});
  public static final Clique cliqueCnC = Clique.valueOf(new int[] {0, 1});
  public static final Clique cliqueCpCnC = Clique.valueOf(new int[] {-1, 0, 1});

  public static final List<Clique> knownCliques = Arrays.asList(cliqueC, cliqueCpC, cliqueCp2C, cliqueCp3C, cliqueCp4C, cliqueCp5C, cliqueCpCp2C, cliqueCpCp2Cp3C, cliqueCpCp2Cp3Cp4C, cliqueCpCp2Cp3Cp4Cp5C, cliqueCnC, cliqueCpCnC);

  public List<Clique> getCliques() {
    return getCliques(flags.maxLeft, flags.maxRight);
  }

  public static List<Clique> getCliques(int maxLeft, int maxRight) {
    List<Clique> cliques = new ArrayList<>();
    for (Clique c : knownCliques) {
      if (-c.maxLeft() <= maxLeft && c.maxRight() <= maxRight) {
        cliques.add(c);
      }
    }
    return cliques;
  }

  /**
   * This method returns a {@link Collection} of the features
   * calculated for the word at the specified position in info (the list of
   * words) for the specified {@link Clique}.
   * It should return the actual String features, <b>NOT</b> wrapped in any
   * other object, as the wrapping
   * will be done automatically.
   * Because it takes a {@link PaddedList} you don't
   * need to worry about indices which are outside of the list.
   *
   * @param info A PaddedList of the feature-value pairs
   * @param position The current position to extract features at
   * @param clique The particular clique for which to extract features. It
   *     should be a member of the knownCliques list.
   * @return A {@link Collection} of the features
   *     calculated for the word at the specified position in info.
   */
  public abstract Collection<String> getCliqueFeatures(PaddedList<IN> info, int position, Clique clique);


  /** Makes more complete feature names out of partial feature names, by
   *  adding a suffix to the String feature name, adding results to an
   *  accumulator
   *
   * @param accumulator The output features are added here
   * @param addend The base set of features
   * @param suffix The suffix added to each feature in the addend set
   */
  @SuppressWarnings({"MethodMayBeStatic"})
  protected void addAllInterningAndSuffixing(Collection<String> accumulator, Collection<String> addend, String suffix) {
    boolean nonNullSuffix = suffix != null && ! suffix.isEmpty();
    if (nonNullSuffix) {
      suffix = '|' + suffix;
    }
    // boolean intern2 = flags.intern2;
    for (String feat : addend) {
      if (nonNullSuffix) {
        feat = feat.concat(suffix);
      }
      // if (intern2) {
      //   feat = feat.intern();
      // }
      accumulator.add(feat);
    }
  }

  /**
   * Convenience methods for subclasses which use CoreLabel.  Gets the
   * word after applying any wordFunction present in the
   * SeqClassifierFlags.
   *
   * @param label A CoreLabel
   * @return The TextAnnotation of the label, perhaps after passing it through
   *     a function (flags.wordFunction)
   */
  protected String getWord(CoreLabel label) {
    String word = label.getString(CoreAnnotations.TextAnnotation.class);
    if (flags.wordFunction != null) {
      word = flags.wordFunction.apply(word);
    }
    return word;
  }

}