MachineReadingProperties.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.ie.machinereading;

import java.util.logging.Logger;

import edu.stanford.nlp.ie.machinereading.structure.RelationMentionFactory;
import edu.stanford.nlp.util.ArgumentParser.Option;

public class MachineReadingProperties {

  @Option(name="logger", gloss="Static logger for this entire class")
  public static Logger logger = Logger.getLogger(MachineReading.class.getName());

  /*
   * general options
   */

  @Option(name = "datasetReaderClass", gloss = "which GenericDataSetReader to use (needs to match the corpus in question)", required = true)
  public static Class<GenericDataSetReader> datasetReaderClass;


  @Option(name = "datasetAuxReaderClass", gloss = "which GenericDataSetReader to use for aux data set (needs to match the corpus in question)")
  public static Class<GenericDataSetReader> datasetAuxReaderClass;

  @Option(name = "useNewHeadFinder", gloss = "If false, use the original head (and worse) finding mechanism in GenericDataSetReader.  This option is primarily around for legacy purposes.")
  public static boolean useNewHeadFinder = true;

  @Option(name = "readerLogLevel", gloss = "verbosity of the corpus reader")
  public static String readerLogLevel = "SEVERE";

  @Option(name = "serializeCorpora", gloss = "if false, we do not attempt to serialize the train/test corpora after reading")
  public static boolean serializeCorpora = true;

  @Option(name = "forceGenerationOfIndexSpans", gloss = "if true (default), regenerate span annotations for trees")
  public static boolean forceGenerationOfIndexSpans = true;

  /*
   * entity extraction options
   */


  @Option(name="serializedEntityExtractorPath",gloss="where to store/load the serialized entity extraction model")
  protected static String serializedEntityExtractorPath = "";

  @Option(name="serializedEntityExtractionResults",gloss="where to store the serialized sentences containing the results of entity extraction")
  protected static String serializedEntityExtractionResults;

  // TODO this option is temporary and should be removed when (if?) gazetteers get
  // folded into feature factories

  @Option(name = "entityGazetteerPath", gloss = "location of entity gazetteer file (if you're using one) -- this is a temporary option")
  public static String entityGazetteerPath;

  @Option(name = "entityClassifier", gloss = "entity extractor class to use")
  public static Class<BasicEntityExtractor> entityClassifier = edu.stanford.nlp.ie.machinereading.BasicEntityExtractor.class;

  @Option(name = "entityResultsPrinters", gloss = "comma-separated list of ResultsPrinter subclasses to use for printing the results of entity extraction")
  public static String entityResultsPrinters = "";

  /*
   * relation extraction options
   */

  @Option(name="serializedRelationExtractorPath",gloss="where to store/load the serialized relation extraction model")
  protected static String serializedRelationExtractorPath = null;

  @Option(name="serializedRelationExtractionResults",gloss="where to store the serialized sentences containing the results of relation extraction")
  protected static String serializedRelationExtractionResults = null;

  @Option(name = "relationFeatureFactoryClass", gloss = "FeatureFactory class to use for generating features from relations for relation extraction")
  public static Class<? extends RelationFeatureFactory> relationFeatureFactoryClass = edu.stanford.nlp.ie.machinereading.BasicRelationFeatureFactory.class;

  @Option(name = "relationMentionFactoryClass", gloss = "relation mention factory class to use.")
  public static Class<RelationMentionFactory> relationMentionFactoryClass =  edu.stanford.nlp.ie.machinereading.structure.RelationMentionFactory.class;

  @Option(name = "relationFeatures", gloss = "comma-separated list of feature types to generate for relation extraction.")
  public static String relationFeatures = "all";

  @Option(name = "relationResultsPrinters", gloss = "comma-separated list of ResultsPrinter subclasses to use for printing the results of relation extraction")
  public static String relationResultsPrinters = "edu.stanford.nlp.ie.machinereading.RelationExtractorResultsPrinter";

  @Option(name = "trainRelationsUsingPredictedEntities", gloss = "if true, the relation extraction model trains using predicted rather than gold entity mentions")
  public static boolean trainRelationsUsingPredictedEntities = false;

  @Option(name = "testRelationsUsingPredictedEntities", gloss = "if true, the relation extraction model is evaluated using predicted rather than gold entity mentions.")
  public static boolean testRelationsUsingPredictedEntities = false;

  @Option(name = "createUnrelatedRelations", gloss = "If true, it creates automatically negative examples by generating all combinations between EntityMentions in a sentence")
  public static boolean createUnrelatedRelations = true;

  @Option(name = "doNotLexicalizeFirstArg", gloss = "If true, it does not create any lexicalized features from the first argument (needed for KBP)")
  public static boolean doNotLexicalizeFirstArg = false;

  // TODO: temporary NFL deadline based hack. remove it.
  @Option(name = "useRelationExtractionModelMerging", gloss = "If true, the relation extractor will use ExtractorMerger for annotation (not training)")
  public static boolean useRelationExtractionModelMerging = false;

  @Option(name = "relationsToSkipDuringTraining", gloss = "comma-separated list relation types to skip during training")
  public static String relationsToSkipDuringTraining ="";

  @Option(name = "relationExtractionPostProcessorClass", gloss = "additional (probably domain-dependent) annotator to postprocess relations")
  public static Class<Extractor> relationExtractionPostProcessorClass;

  @Option(name = "relationClassifier", gloss = "relation extractor class to use")
  public static Class<? extends BasicRelationExtractor> relationClassifier = edu.stanford.nlp.ie.machinereading.BasicRelationExtractor.class;

  /*
   * event extraction options
   */

  @Option(name="serializedEventExtractorPath",gloss="where to store/load the serialized event extraction model")
  protected static String serializedEventExtractorPath = "";

  @Option(name="serializedEventExtractionResults",gloss="where to store the serialized sentences containing the results of event extraction")
  protected static String serializedEventExtractionResults;

  @Option(name = "eventResultsPrinters", gloss = "comma-separated list of ResultsPrinter subclasses to use for printing the results of event extraction")
  public static String eventResultsPrinters = "";

  @Option(name = "trainEventsUsingPredictedEntities", gloss = "if true, the event extraction model trains using predicted rather than gold entity mentions")
  public static boolean trainEventsUsingPredictedEntities = false;

  @Option(name = "testEventsUsingPredictedEntities", gloss = "if true, the event extraction model is evaluated using predicted rather than gold entity mentions")
  public static boolean testEventsUsingPredictedEntities = false;

  /*
   * global, domain-dependent options
   */

  @Option(name = "consistencyCheck", gloss = "consistency checker class to use")
  public static Class<Extractor> consistencyCheck;


  /*
   * training options
   */

  @Option(name = "trainPath", gloss = " path to the training file/directory")
  protected static String trainPath;

  @Option(name = "auxDataPath", gloss = "path to the aux training file/directory")
  protected static String auxDataPath;

  @Option(name = "serializedTrainingSentencesPath", gloss = " where to store the serialized training sentences objects", required = true)
  protected static String serializedTrainingSentencesPath;

  @Option(name = "serializedAuxTrainingSentencesPath", gloss = "where to store the serialized aux training sentences objects")
  protected static String serializedAuxTrainingSentencesPath;

  @Option(name = "loadModel", gloss = "if true, load a serialized model rather than training a new one")
  protected static boolean loadModel = false;

  @Option(name = "trainUsePipelineNER", gloss = "during training, use NER generated by the CoreNLP pipeline")
  public static boolean trainUsePipelineNER = false;

  /**
   * evaluation options (ignored if trainOnly is true)
   */


  @Option(name = "trainOnly", gloss = "if true, don't run evaluation (implies forceRetraining)")
  protected static boolean trainOnly = false;

  @Option(name = "testPath", gloss = "path to the testing file/directory")
  protected static String testPath;

  @Option(name = "serializedTestSentencesPath", gloss = "where to store the serialized test sentence objects")
  protected static String serializedTestSentencesPath;

  @Option(name="extractEntities",gloss="whether to extract entities, or use gold-standard entities for relation/event extraction")
  protected static boolean extractEntities = true;

  @Option(name="extractRelations",gloss="whether we should extract relations")
  protected static boolean extractRelations = true;

  @Option(name="extractEvents",gloss="whether we should extract events")
  protected static boolean extractEvents = true;


  /*
   * cross-validation options
   */
  @Option(name="crossValidate",gloss="if true, run cross-validation")
  protected static boolean crossValidate = false;

  @Option(name = "kfold", gloss = "number of partitions in training data for cross validation")
  public static int kfold = 5;

  @Option(name = "percentageOfTrain", gloss = "Pct of train partition to use for training (e.g., for RELMS experiment); default all = 1.0")
  public static double percentageOfTrain = 1.0;

  /**
   * Additional features, may not necessarily be used in the public release
   */
  @Option(name = "featureSimilarityThreshold")
  public static double featureSimilarityThreshold = 0.2;

  @Option(name = "computeFeatSimilarity")
  public static boolean computeFeatSimilarity = true;

  @Option(name = "featureSelectionNumFeaturesRatio")
  public static double featureSelectionNumFeaturesRatio = 0.7;

  @Option(name = "L1Reg")
  public static boolean L1Reg = false;

  @Option(name = "L2Reg")
  public static boolean L2Reg = true;

  @Option(name = "L1RegLambda")
  public static double L1RegLambda = 1.0;

  private MachineReadingProperties() {} // class of static option variables.

}