package edu.stanford.nlp.coref; import java.util.Locale; import java.util.Properties; import edu.stanford.nlp.util.PropertiesUtils; /** * Manages the properties for running coref. * * @author Kevin Clark */ public class CorefProperties { private CorefProperties() {} // static methods //---------- Coreference Algorithms ---------- public enum CorefAlgorithmType {CLUSTERING, STATISTICAL, NEURAL, HYBRID} public static CorefAlgorithmType algorithm(Properties props) { String type = PropertiesUtils.getString(props, "coref.algorithm", getLanguage(props) == Locale.ENGLISH ? "statistical" : "neural"); return CorefAlgorithmType.valueOf(type.toUpperCase()); } //---------- General Coreference Options ---------- /** * When conll() is true, coref models: * <ul> * <li>Use provided POS, NER, Parsing, etc. (instead of using CoreNLP annotators)</li> * <li>Use provided speaker annotations</li> * <li>Use provided document type and genre information</li> * </ul> */ public static boolean conll(Properties props) { return PropertiesUtils.getBool(props, "coref.conll", false); } public static boolean useConstituencyParse(Properties props) { return PropertiesUtils.getBool(props, "coref.useConstituencyParse", algorithm(props) != CorefAlgorithmType.STATISTICAL || conll(props)); } public static boolean verbose(Properties props) { return PropertiesUtils.getBool(props, "coref.verbose", false); } public static boolean removeSingletonClusters(Properties props) { return PropertiesUtils.getBool(props, "coref.removeSingletonClusters", true); } // ---------- Heuristic Mention Filtering ---------- public static int maxMentionDistance(Properties props) { return PropertiesUtils.getInt(props, "coref.maxMentionDistance", conll(props) ? Integer.MAX_VALUE : 50); } public static int maxMentionDistanceWithStringMatch(Properties props) { return PropertiesUtils.getInt(props, "coref.maxMentionDistanceWithStringMatch", 500); } // ---------- Mention Detection ---------- public enum MentionDetectionType { RULE, HYBRID, DEPENDENCY } public static MentionDetectionType mdType(Properties props) { String type = PropertiesUtils.getString(props, "coref.md.type", useConstituencyParse(props) ? "RULE" : "dep"); if (type.equalsIgnoreCase("dep")) { type = "DEPENDENCY"; } return MentionDetectionType.valueOf(type.toUpperCase()); } public static String getMentionDetectionModel(Properties props) { return PropertiesUtils.getString(props, "coref.md.model", useConstituencyParse(props) ? "edu/stanford/nlp/models/coref/md-model.ser" : "edu/stanford/nlp/models/coref/md-model-dep.ser.gz"); } public static boolean isMentionDetectionTraining(Properties props) { return PropertiesUtils.getBool(props, "coref.md.isTraining", false); } public static void setMentionDetectionTraining(Properties props, boolean val) { props.setProperty("coref.md.isTraining", String.valueOf(val)); } public static boolean removeNestedMentions(Properties props) { return PropertiesUtils.getBool(props, "removeNestedMentions", true); } public static void setRemoveNestedMentions(Properties props, boolean val) { props.setProperty("removeNestedMentions", String.valueOf(val)); } public static boolean liberalMD(Properties props) { return PropertiesUtils.getBool(props, "coref.md.liberalMD", false); } public static boolean useGoldMentions(Properties props) { return PropertiesUtils.getBool(props, "coref.md.useGoldMentions", false); } // ---------- Input and Output Data ---------- public static final String OUTPUT_PATH_PROP = "coref.conllOutputPath"; public static String conllOutputPath(Properties props) { String returnPath = props.getProperty("coref.conllOutputPath", "/scr/nlp/coref/logs/"); if (!returnPath.substring(returnPath.length()-1).equals("/")) returnPath += "/"; return returnPath; } public enum Dataset {TRAIN, DEV, TEST} public static void setInput(Properties props, Dataset d) { props.setProperty("coref.inputPath", d == Dataset.TRAIN ? getTrainDataPath(props) : (d == Dataset.DEV ? getDevDataPath(props) : getTestDataPath(props))); } public static String getDataPath(Properties props) { String returnPath = props.getProperty("coref.data", "/scr/nlp/data/conll-2012/"); if (!returnPath.substring(returnPath.length()-1).equals("/")) returnPath += "/"; return returnPath; } public static String getTrainDataPath(Properties props) { return props.getProperty("coref.trainData", getDataPath(props) + "v4/data/train/data/" + getLanguageStr(props) + "/annotations/"); } public static String getDevDataPath(Properties props) { return props.getProperty("coref.devData", getDataPath(props) + "v4/data/development/data/" + getLanguageStr(props) + "/annotations/"); } public static String getTestDataPath(Properties props) { return props.getProperty("coref.testData", getDataPath(props) + "v9/data/test/data/" + getLanguageStr(props) + "/annotations"); } public static String getInputPath(Properties props) { String input = props.getProperty("coref.inputPath", getTestDataPath(props)); return input; } public static String getScorerPath(Properties props) { return props.getProperty("coref.scorer", "/scr/nlp/data/conll-2012/scorer/v8.01/scorer.pl"); } public static Locale getLanguage(Properties props) { String lang = PropertiesUtils.getString(props, "coref.language", "en"); if (lang.equalsIgnoreCase("en") || lang.equalsIgnoreCase("english")) { return Locale.ENGLISH; } else if(lang.equalsIgnoreCase("zh") || lang.equalsIgnoreCase("chinese")) { return Locale.CHINESE; } else { throw new IllegalArgumentException("unsupported language"); } } private static String getLanguageStr(Properties props) { return getLanguage(props).getDisplayName().toLowerCase(); } }