package edu.stanford.nlp.ie.machinereading;
import java.util.logging.Logger;
import edu.stanford.nlp.ie.machinereading.structure.RelationMentionFactory;
import edu.stanford.nlp.util.ArgumentParser.Option;
public class MachineReadingProperties {
@Option(name="logger", gloss="Static logger for this entire class")
public static Logger logger = Logger.getLogger(MachineReading.class.getName());
/*
* general options
*/
@Option(name = "datasetReaderClass", gloss = "which GenericDataSetReader to use (needs to match the corpus in question)", required = true)
public static Class<GenericDataSetReader> datasetReaderClass;
@Option(name = "datasetAuxReaderClass", gloss = "which GenericDataSetReader to use for aux data set (needs to match the corpus in question)")
public static Class<GenericDataSetReader> datasetAuxReaderClass;
@Option(name = "useNewHeadFinder", gloss = "If false, use the original head (and worse) finding mechanism in GenericDataSetReader. This option is primarily around for legacy purposes.")
public static boolean useNewHeadFinder = true;
@Option(name = "readerLogLevel", gloss = "verbosity of the corpus reader")
public static String readerLogLevel = "SEVERE";
@Option(name = "serializeCorpora", gloss = "if false, we do not attempt to serialize the train/test corpora after reading")
public static boolean serializeCorpora = true;
@Option(name = "forceGenerationOfIndexSpans", gloss = "if true (default), regenerate span annotations for trees")
public static boolean forceGenerationOfIndexSpans = true;
/*
* entity extraction options
*/
@Option(name="serializedEntityExtractorPath",gloss="where to store/load the serialized entity extraction model")
protected static String serializedEntityExtractorPath = "";
@Option(name="serializedEntityExtractionResults",gloss="where to store the serialized sentences containing the results of entity extraction")
protected static String serializedEntityExtractionResults;
// TODO this option is temporary and should be removed when (if?) gazetteers get
// folded into feature factories
@Option(name = "entityGazetteerPath", gloss = "location of entity gazetteer file (if you're using one) -- this is a temporary option")
public static String entityGazetteerPath;
@Option(name = "entityClassifier", gloss = "entity extractor class to use")
public static Class<BasicEntityExtractor> entityClassifier = edu.stanford.nlp.ie.machinereading.BasicEntityExtractor.class;
@Option(name = "entityResultsPrinters", gloss = "comma-separated list of ResultsPrinter subclasses to use for printing the results of entity extraction")
public static String entityResultsPrinters = "";
/*
* relation extraction options
*/
@Option(name="serializedRelationExtractorPath",gloss="where to store/load the serialized relation extraction model")
protected static String serializedRelationExtractorPath = null;
@Option(name="serializedRelationExtractionResults",gloss="where to store the serialized sentences containing the results of relation extraction")
protected static String serializedRelationExtractionResults = null;
@Option(name = "relationFeatureFactoryClass", gloss = "FeatureFactory class to use for generating features from relations for relation extraction")
public static Class<? extends RelationFeatureFactory> relationFeatureFactoryClass = edu.stanford.nlp.ie.machinereading.BasicRelationFeatureFactory.class;
@Option(name = "relationMentionFactoryClass", gloss = "relation mention factory class to use.")
public static Class<RelationMentionFactory> relationMentionFactoryClass = edu.stanford.nlp.ie.machinereading.structure.RelationMentionFactory.class;
@Option(name = "relationFeatures", gloss = "comma-separated list of feature types to generate for relation extraction.")
public static String relationFeatures = "all";
@Option(name = "relationResultsPrinters", gloss = "comma-separated list of ResultsPrinter subclasses to use for printing the results of relation extraction")
public static String relationResultsPrinters = "edu.stanford.nlp.ie.machinereading.RelationExtractorResultsPrinter";
@Option(name = "trainRelationsUsingPredictedEntities", gloss = "if true, the relation extraction model trains using predicted rather than gold entity mentions")
public static boolean trainRelationsUsingPredictedEntities = false;
@Option(name = "testRelationsUsingPredictedEntities", gloss = "if true, the relation extraction model is evaluated using predicted rather than gold entity mentions.")
public static boolean testRelationsUsingPredictedEntities = false;
@Option(name = "createUnrelatedRelations", gloss = "If true, it creates automatically negative examples by generating all combinations between EntityMentions in a sentence")
public static boolean createUnrelatedRelations = true;
@Option(name = "doNotLexicalizeFirstArg", gloss = "If true, it does not create any lexicalized features from the first argument (needed for KBP)")
public static boolean doNotLexicalizeFirstArg = false;
// TODO: temporary NFL deadline based hack. remove it.
@Option(name = "useRelationExtractionModelMerging", gloss = "If true, the relation extractor will use ExtractorMerger for annotation (not training)")
public static boolean useRelationExtractionModelMerging = false;
@Option(name = "relationsToSkipDuringTraining", gloss = "comma-separated list relation types to skip during training")
public static String relationsToSkipDuringTraining ="";
@Option(name = "relationExtractionPostProcessorClass", gloss = "additional (probably domain-dependent) annotator to postprocess relations")
public static Class<Extractor> relationExtractionPostProcessorClass;
@Option(name = "relationClassifier", gloss = "relation extractor class to use")
public static Class<? extends BasicRelationExtractor> relationClassifier = edu.stanford.nlp.ie.machinereading.BasicRelationExtractor.class;
/*
* event extraction options
*/
@Option(name="serializedEventExtractorPath",gloss="where to store/load the serialized event extraction model")
protected static String serializedEventExtractorPath = "";
@Option(name="serializedEventExtractionResults",gloss="where to store the serialized sentences containing the results of event extraction")
protected static String serializedEventExtractionResults;
@Option(name = "eventResultsPrinters", gloss = "comma-separated list of ResultsPrinter subclasses to use for printing the results of event extraction")
public static String eventResultsPrinters = "";
@Option(name = "trainEventsUsingPredictedEntities", gloss = "if true, the event extraction model trains using predicted rather than gold entity mentions")
public static boolean trainEventsUsingPredictedEntities = false;
@Option(name = "testEventsUsingPredictedEntities", gloss = "if true, the event extraction model is evaluated using predicted rather than gold entity mentions")
public static boolean testEventsUsingPredictedEntities = false;
/*
* global, domain-dependent options
*/
@Option(name = "consistencyCheck", gloss = "consistency checker class to use")
public static Class<Extractor> consistencyCheck;
/*
* training options
*/
@Option(name = "trainPath", gloss = " path to the training file/directory")
protected static String trainPath;
@Option(name = "auxDataPath", gloss = "path to the aux training file/directory")
protected static String auxDataPath;
@Option(name = "serializedTrainingSentencesPath", gloss = " where to store the serialized training sentences objects", required = true)
protected static String serializedTrainingSentencesPath;
@Option(name = "serializedAuxTrainingSentencesPath", gloss = "where to store the serialized aux training sentences objects")
protected static String serializedAuxTrainingSentencesPath;
@Option(name = "loadModel", gloss = "if true, load a serialized model rather than training a new one")
protected static boolean loadModel = false;
@Option(name = "trainUsePipelineNER", gloss = "during training, use NER generated by the CoreNLP pipeline")
public static boolean trainUsePipelineNER = false;
/**
* evaluation options (ignored if trainOnly is true)
*/
@Option(name = "trainOnly", gloss = "if true, don't run evaluation (implies forceRetraining)")
protected static boolean trainOnly = false;
@Option(name = "testPath", gloss = "path to the testing file/directory")
protected static String testPath;
@Option(name = "serializedTestSentencesPath", gloss = "where to store the serialized test sentence objects")
protected static String serializedTestSentencesPath;
@Option(name="extractEntities",gloss="whether to extract entities, or use gold-standard entities for relation/event extraction")
protected static boolean extractEntities = true;
@Option(name="extractRelations",gloss="whether we should extract relations")
protected static boolean extractRelations = true;
@Option(name="extractEvents",gloss="whether we should extract events")
protected static boolean extractEvents = true;
/*
* cross-validation options
*/
@Option(name="crossValidate",gloss="if true, run cross-validation")
protected static boolean crossValidate = false;
@Option(name = "kfold", gloss = "number of partitions in training data for cross validation")
public static int kfold = 5;
@Option(name = "percentageOfTrain", gloss = "Pct of train partition to use for training (e.g., for RELMS experiment); default all = 1.0")
public static double percentageOfTrain = 1.0;
/**
* Additional features, may not necessarily be used in the public release
*/
@Option(name = "featureSimilarityThreshold")
public static double featureSimilarityThreshold = 0.2;
@Option(name = "computeFeatSimilarity")
public static boolean computeFeatSimilarity = true;
@Option(name = "featureSelectionNumFeaturesRatio")
public static double featureSelectionNumFeaturesRatio = 0.7;
@Option(name = "L1Reg")
public static boolean L1Reg = false;
@Option(name = "L2Reg")
public static boolean L2Reg = true;
@Option(name = "L1RegLambda")
public static double L1RegLambda = 1.0;
private MachineReadingProperties() {} // class of static option variables.
}