package edu.stanford.nlp.sequences; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.optimization.StochasticCalculateMethods; import edu.stanford.nlp.process.WordShapeClassifier; import java.io.Serializable; import java.util.*; /** Flags for sequence classifiers. Documentation for general flags and * flags for NER can be found in the Javadoc of * {@link edu.stanford.nlp.ie.NERFeatureFactory}. * Documentation for the flags for Chinese word segmentation can be * found in the Javadoc of * {@link edu.stanford.nlp.wordseg.ChineseSegmenterFeatureFactory}. * <p/> * <i>Programming note:</i> Try <b>very hard</b> to only add new variables * at the end of the list of variables (and not to change existing variables). * Otherwise you usually break all currently serialized classifiers! * Search for "ADD VARIABLES ABOVE HERE" below. * * @author Jenny Finkel */ public class SeqClassifierFlags implements Serializable { private static final long serialVersionUID = -7076671761070232567L; public static final String DEFAULT_BACKGROUND_SYMBOL = "O"; private String stringRep = ""; public boolean useNGrams = false; public boolean conjoinShapeNGrams = false; public boolean lowercaseNGrams = false; public boolean dehyphenateNGrams = false; public boolean usePrev = false; public boolean useNext = false; public boolean useTags = false; public boolean useWordPairs = false; public boolean useGazettes = false; public boolean useSequences = true; public boolean usePrevSequences = false; public boolean useNextSequences = false; public boolean useLongSequences = false; public boolean useBoundarySequences = false; public boolean useTaggySequences = false; public boolean useExtraTaggySequences = false; public boolean dontExtendTaggy = false; public boolean useTaggySequencesShapeInteraction = false; public boolean strictlyZeroethOrder = false; public boolean strictlyFirstOrder = false; public boolean strictlySecondOrder = false; public boolean strictlyThirdOrder = false; public String entitySubclassification = "IO"; public boolean retainEntitySubclassification = false; public boolean useGazettePhrases = false; public boolean makeConsistent = false; public boolean useWordLabelCounts = false; //boolean usePrevInstanceLabel = false; //boolean useNextInstanceLabel = false; public boolean useViterbi = true; public int[] binnedLengths = null; public boolean useSum = false; public double tolerance = 1e-4; // TODO: [cdm aug06]: this should really be an int now (number of datums // for which to write features), but I kept it a String so as not to break // serialization. Change when breaking serialization anyway. public String printFeatures = null; public boolean useSymTags = false; /** * useSymWordPairs Has a small negative effect. */ public boolean useSymWordPairs = false; public String printClassifier = "WeightHistogram"; public int printClassifierParam = 100; public boolean intern = false; public boolean intern2 = false; public boolean selfTest = false; public boolean sloppyGazette = false; public boolean cleanGazette = false; public boolean noMidNGrams = false; public int maxNGramLeng = -1; public boolean useReverse = false; public boolean greekifyNGrams = false; public boolean useParenMatching = false; public boolean useLemmas = false; public boolean usePrevNextLemmas = false; public boolean normalizeTerms = false; public boolean normalizeTimex = false; public boolean useNB = false; public boolean useQN = true; public boolean useFloat = false; public int QNsize = 25; public int QNsize2 = 25; public int maxIterations = -1; public int wordShape = WordShapeClassifier.NOWORDSHAPE; public boolean useShapeStrings = false; public boolean useTypeSeqs = false; public boolean useTypeSeqs2 = false; public boolean useTypeSeqs3 = false; public boolean useDisjunctive = false; public int disjunctionWidth = 4; public boolean useDisjunctiveShapeInteraction = false; public boolean useDisjShape = false; public boolean useWord = true; // ON by default public boolean useClassFeature = false; public boolean useShapeConjunctions = false; public boolean useWordTag = false; public boolean useNPHead = false; public boolean useNPGovernor = false; public boolean useHeadGov = false; public boolean useLastRealWord = false; public boolean useNextRealWord = false; public boolean useOccurrencePatterns = false; public boolean useTypeySequences = false; public boolean justify = false; public boolean normalize = false; public String priorType = "QUADRATIC"; public double sigma = 1.0; public double epsilon = 0.01; public int beamSize = 30; public int maxLeft = 2; public int maxRight = 0; public boolean usePosition = false; public boolean useBeginSent = false; public boolean useGazFeatures = false; public boolean useMoreGazFeatures = false; public boolean useAbbr = false; public boolean useMinimalAbbr = false; public boolean useAbbr1 = false; public boolean useMinimalAbbr1 = false; public boolean useMoreAbbr = false; public boolean deleteBlankLines = false; public boolean useGENIA = false; public boolean useTOK = false; public boolean useABSTR = false; public boolean useABSTRFreqDict = false; public boolean useABSTRFreq = false; public boolean useFREQ = false; public boolean useABGENE = false; public boolean useWEB = false; public boolean useWEBFreqDict = false; public boolean useIsURL = false; public boolean useURLSequences = false; public boolean useIsDateRange = false; public boolean useEntityTypes = false; public boolean useEntityTypeSequences = false; public boolean useEntityRule =false; public boolean useOrdinal=false; public boolean useACR = false; public boolean useANTE = false; public boolean useMoreTags = false; public boolean useChunks = false; public boolean useChunkySequences = false; public boolean usePrevVB = false; public boolean useNextVB = false; public boolean useVB = false; public boolean subCWGaz = false; public String documentReader = "ColumnDocumentReader"; // OBSOLETE: delete when breaking serialization sometime. // public String trainMap = "word=0,tag=1,answer=2"; // public String testMap = "word=0,tag=1,answer=2"; public String map = "word=0,tag=1,answer=2"; public boolean useWideDisjunctive = false; public int wideDisjunctionWidth = 10; // chinese word-segmenter features public boolean useRadical = false; public boolean useBigramInTwoClique = false; public String morphFeatureFile = null; public boolean useReverseAffix = false; public int charHalfWindow = 3; public boolean useWord1 = false; public boolean useWord2 = false; public boolean useWord3 = false; public boolean useWord4 = false; public boolean useRad1 = false; public boolean useRad2 = false; public boolean useWordn = false; public boolean useCTBPre1 = false; public boolean useCTBSuf1 = false; public boolean useASBCPre1 = false; public boolean useASBCSuf1 = false; public boolean usePKPre1 = false; public boolean usePKSuf1 = false; public boolean useHKPre1 = false; public boolean useHKSuf1 = false; public boolean useCTBChar2= false; public boolean useASBCChar2= false; public boolean useHKChar2= false; public boolean usePKChar2= false; public boolean useRule2=false; public boolean useDict2=false; public boolean useOutDict2=false; public String outDict2="/u/htseng/scr/chunking/segmentation/out.lexicon"; public boolean useDictleng=false; public boolean useDictCTB2=false; public boolean useDictASBC2=false; public boolean useDictPK2=false; public boolean useDictHK2=false; public boolean useBig5=false; public boolean useNegDict2=false; public boolean useNegDict3=false; public boolean useNegDict4=false; public boolean useNegCTBDict2=false; public boolean useNegCTBDict3=false; public boolean useNegCTBDict4=false; public boolean useNegASBCDict2=false; public boolean useNegASBCDict3=false; public boolean useNegASBCDict4=false; public boolean useNegHKDict2=false; public boolean useNegHKDict3=false; public boolean useNegHKDict4=false; public boolean useNegPKDict2=false; public boolean useNegPKDict3=false; public boolean useNegPKDict4=false; public boolean usePre=false; public boolean useSuf=false; public boolean useRule=false; public boolean useHk=false; public boolean useMsr=false; public boolean useMSRChar2=false; public boolean usePk=false; public boolean useAs=false; public boolean useFilter=false; // this flag is used for nothing; delete when breaking serialization public boolean largeChSegFile =false; // this flag is used for nothing; delete when breaking serialization public boolean useRad2b = false; /** * Keep the whitespaces between English words in testFile when printing out answers. * Doesn't really change the content of the CoreLabels. (For Chinese segmentation.) */ public boolean keepEnglishWhitespaces = false; /** * Keep all the whitespaces words in testFile when printing out answers. * Doesn't really change the content of the CoreLabels. (For Chinese segmentation.) */ public boolean keepAllWhitespaces = false; public boolean sighanPostProcessing = false; /** * use POS information (an "open" feature for Chinese segmentation) */ public boolean useChPos = false; // CTBSegDocumentReader normalization table // A value of null means that a default algorithmic normalization // is done in which ASCII characters get mapped to their fullwidth // equivalents in the Unihan range public String normalizationTable; // = null; public String dictionary; // = null; public String serializedDictionary; // = null; public String dictionary2; // = null; public String normTableEncoding = "GB18030"; /** for Sighan bakeoff 2005, the path to the dictionary of bigrams appeared in corpus */ public String sighanCorporaDict = "/u/nlp/data/chinese-segmenter/"; // end Sighan 20005 chinese word-segmenter features/properties public boolean useWordShapeGaz = false; public String wordShapeGaz = null; // TODO: This should maybe be removed in favor of suppressing splitting when maxDocLengh <= 0, when // next breaking serialization public boolean splitDocuments = true; public boolean printXML = false; public boolean useSeenFeaturesOnly = false; public String lastNameList = "/u/nlp/data/dist.all.last"; public String maleNameList = "/u/nlp/data/dist.male.first"; public String femaleNameList = "/u/nlp/data/dist.female.first"; // don't want these serialized public transient String trainFile = null; /** NER adapation (Gaussian prior) parameters. */ public transient String adaptFile = null; public transient String devFile = null; public transient String testFile = null; public transient String textFile = null; public transient String loadClassifier = null; public transient String loadTextClassifier = null; public transient String loadJarClassifier = null; public transient String loadAuxClassifier = null; public transient String serializeTo = null; public transient String serializeToText = null; public transient int interimOutputFreq = 0; public transient String initialWeights = null; public transient List<String> gazettes = new ArrayList<String>(); public transient String selfTrainFile = null; public String inputEncoding = null; // used for CTBSegDocumentReader as well public boolean bioSubmitOutput = false; public int numRuns = 1; public String answerFile = null; public String altAnswerFile = null; public String dropGaz; public String printGazFeatures = null; public int numStartLayers = 1; public boolean dump = false; public boolean mergeTags; // whether to merge B- and I- tags public boolean splitOnHead; // threshold public int featureCountThreshold = 0; public double featureWeightThreshold = 0.0; // feature factory public String featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory"; public String backgroundSymbol = DEFAULT_BACKGROUND_SYMBOL; //use public boolean useObservedSequencesOnly = false; public int maxDocSize = 10000; public boolean printProbs = false; public boolean printFirstOrderProbs = false; public boolean saveFeatureIndexToDisk = false; public boolean removeBackgroundSingletonFeatures = false; public boolean doGibbs = false; public int numSamples = 100; public boolean useNERPrior = false; public boolean useAcqPrior = false; public boolean useMUCFeatures = false; public double annealingRate = 0.0; public String annealingType = null; public String loadProcessedData = null; public boolean initViterbi = true; public boolean useUnknown = false; public boolean checkNameList = false; public boolean useSemPrior = false; public boolean useFirstWord = false; public boolean useNumberFeature = false; public int ocrFold = 0; public transient boolean ocrTrain = false; public String classifierType = "MaxEnt"; public String svmModelFile = null; public String inferenceType = "Viterbi"; public boolean useLemmaAsWord = false; public String type = "cmm"; public String readerAndWriter = "edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter"; public List<String> comboProps = new ArrayList<String>(); public boolean usePrediction = false; public boolean useAltGazFeatures = false; public String gazFilesFile = null; public boolean usePrediction2 = false; public String baseTrainDir = "."; public String baseTestDir = "."; public String trainFiles = null; public String trainFileList = null; public String testFiles = null; public String trainDirs = null; public String testDirs = null; public boolean useOnlySeenWeights = false; public String predProp = null; public CoreLabel pad = new CoreLabel(); public boolean useObservedFeaturesOnly = false; public String distSimLexicon = null; public boolean useDistSim = false; public int removeTopN = 0; public int numTimesRemoveTopN = 1; public double randomizedRatio = 1.0; public double removeTopNPercent = 0.0; public int purgeFeatures = -1; public boolean booleanFeatures = false; public boolean iobWrapper = false; public boolean iobTags = false; public boolean useSegmentation = false; /* binary segmentation feature for character-based Chinese NER */ public boolean memoryThrift = false; public boolean timitDatum = false; public String serializeDatasetsDir = null; public String loadDatasetsDir = null; public String pushDir = null; public boolean purgeDatasets = false; public boolean keepOBInMemory = true; public boolean fakeDataset = false; public boolean restrictTransitionsTimit = false; public int numDatasetsPerFile = 1; public boolean useTitle = false; // these are for the old stuff public boolean lowerNewgeneThreshold = false; public boolean useEitherSideWord = false; public boolean useEitherSideDisjunctive = false; public boolean twoStage = false; public String crfType = "MaxEnt"; public int featureThreshold = 1; public String featThreshFile = null; public double featureDiffThresh = 0.0; public int numTimesPruneFeatures = 0; public double newgeneThreshold = 0.0; public boolean doAdaptation = false; public boolean useInternal = true; public boolean useExternal = true; public double selfTrainConfidenceThreshold = 0.9; public int selfTrainIterations = 1; public int selfTrainWindowSize = 1; //Unigram public boolean useHuber = false; public boolean useQuartic = false; public double adaptSigma = 1.0; public int numFolds = 1; public int startFold = 1; public int endFold = 1; public boolean cacheNGrams = false; public String outputFormat; public boolean useSMD = false; public boolean useSGDtoQN = false; public boolean useStochasticQN = false; public boolean useScaledSGD = false; public int scaledSGDMethod = 0; public int SGDPasses = -1; public int QNPasses = -1; public boolean tuneSGD = false; public StochasticCalculateMethods stochasticMethod = StochasticCalculateMethods.NoneSpecified ; public double initialGain = 0.1; public int stochasticBatchSize = 15; public boolean useSGD = false; public double gainSGD = 0.1; public boolean useHybrid = false; public int hybridCutoffIteration = 0; public boolean outputIterationsToFile = false; public boolean testObjFunction = false; public boolean testVariance = false; public int SGD2QNhessSamples = 50; public boolean testHessSamples = false; public int CRForder = 1; public int CRFwindow = 2; public boolean estimateInitial = false; public transient String biasedTrainFile = null; public transient String confusionMatrix = null; public String outputEncoding = null; public boolean useKBest = false; public String searchGraphPrefix = null; public double searchGraphPrune = Double.POSITIVE_INFINITY; public int kBest = 1; // more chinese segmenter features for GALE 2007 public boolean useFeaturesC4gram; public boolean useFeaturesC5gram; public boolean useFeaturesC6gram; public boolean useFeaturesCpC4gram; public boolean useFeaturesCpC5gram; public boolean useFeaturesCpC6gram; public boolean useUnicodeType; public boolean useUnicodeType4gram; public boolean useUnicodeType5gram; public boolean use4Clique; public boolean useUnicodeBlock; public boolean useShapeStrings1; public boolean useShapeStrings3; public boolean useShapeStrings4; public boolean useShapeStrings5; public boolean useGoodForNamesCpC; public boolean useDictionaryConjunctions; public boolean expandMidDot; public int printFeaturesUpto; public boolean useDictionaryConjunctions3; public boolean useWordUTypeConjunctions2; public boolean useWordUTypeConjunctions3; public boolean useWordShapeConjunctions2; public boolean useWordShapeConjunctions3; public boolean useMidDotShape; public boolean augmentedDateChars; public boolean suppressMidDotPostprocessing; public boolean printNR; // a flag for WordAndTagDocumentReaderAndWriter public String classBias = null; public boolean printLabelValue; // Old printErrorStuff public boolean useRobustQN = false; public boolean combo = false; public boolean useGenericFeatures = false; public boolean verboseForTrueCasing = false; public String trainHierarchical = null; public String domain = null; public boolean baseline = false; public String transferSigmas = null; public boolean doFE = false; public boolean restrictLabels = true; public boolean announceObjectBankEntries = false; // whether to print a line giving each ObjectBank entry (usually a filename) // "ADD VARIABLES ABOVE HERE" public SeqClassifierFlags() { } /** Create a new SeqClassifierFlags object and initialize it * using values in the Properties object. * The properties are printed to stderr as it works. * * @param props The properties object used for initialization */ public SeqClassifierFlags(Properties props) { setProperties(props, true); } /** * Initialize this object using values in Properties object. * The properties are printed to stderr as it works. * * @param props The properties object used for initialization */ public final void setProperties(Properties props) { setProperties(props, true); } /** * Initialize using values in Properties file. * * @param props The properties object used for initialization * @param printProps Whether to print the properties to stderr as it works. */ public void setProperties(Properties props, boolean printProps) { StringBuilder sb = new StringBuilder(stringRep); for (Enumeration e = props.propertyNames(); e.hasMoreElements(); ) { String key = (String) e.nextElement(); String val = props.getProperty(key); if (! (key.length() == 0 && val.length() == 0)) { if (printProps) { System.err.println(key+ '=' +val); } sb.append(key).append('=').append(val).append('\n'); } if (key.equalsIgnoreCase("macro")) { if (Boolean.parseBoolean(val)) { useObservedSequencesOnly = true; readerAndWriter = "edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter"; // useClassFeature = true; // submit useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // reverse useReverse = true; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth sigma = 3.0; // normalize normalize = true; normalizeTimex = true; } } else if (key.equalsIgnoreCase("goodCoNLL")) { if (Boolean.parseBoolean(val)) { // featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory"; readerAndWriter = "edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter"; useObservedSequencesOnly = true; // useClassFeature = true; useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // should this be set?? maxNGramLeng = 6; No (to get best score). // reverse useReverse = false; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth sigma = 50.0; // increased Aug 2006 from 20; helpful with less feats // normalize normalize = true; normalizeTimex = true; maxLeft = 2; useDisjunctive = true; disjunctionWidth = 4; // clearly optimal for CoNLL useBoundarySequences = true; useLemmas = true; // no-op except for German usePrevNextLemmas = true; // no-op except for German inputEncoding="iso-8859-1"; // opt useQN = true; QNsize = 15; } } else if (key.equalsIgnoreCase("conllNoTags")) { if (Boolean.parseBoolean(val)) { readerAndWriter = "edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter"; // trainMap=testMap="word=0,answer=1"; map="word=0,answer=1"; useObservedSequencesOnly = true; // useClassFeature = true; useLongSequences = true; //useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; //useTags = true; useWordPairs = true; useSequences = true; usePrevSequences = true; // noMidNGrams noMidNGrams = true; // reverse useReverse = false; // typeseqs3 useTypeSeqs = true; useTypeSeqs2 = true; useTypeySequences = true; // wordtypes2 && known wordShape = WordShapeClassifier.WORDSHAPEDAN2USELC; // occurrence //useOccurrencePatterns = true; // realword useLastRealWord = true; useNextRealWord = true; // smooth sigma = 20.0; adaptSigma = 20.0; // normalize normalize = true; normalizeTimex = true; maxLeft = 2; useDisjunctive = true; disjunctionWidth = 4; useBoundarySequences = true; //useLemmas = true; // no-op except for German //usePrevNextLemmas = true; // no-op except for German inputEncoding="iso-8859-1"; // opt useQN = true; QNsize = 15; } } else if (key.equalsIgnoreCase("notags")) { if (Boolean.parseBoolean(val)) { // turn off all features that use POS tags // this is slightly crude: it also turns off a few things that // don't use tags in e.g., useTaggySequences useTags = false; useSymTags = false; useTaggySequences = false; useOccurrencePatterns = false; } } else if (key.equalsIgnoreCase("submit")) { if (Boolean.parseBoolean(val)) { useLongSequences = true; useTaggySequences = true; useNGrams = true; usePrev = true; useNext = true; useTags = true; useWordPairs = true; wordShape = WordShapeClassifier.WORDSHAPEDAN1; useSequences = true; usePrevSequences = true; } } else if (key.equalsIgnoreCase("binnedLengths")) { if (val != null) { String[] binnedLengthStrs = val.split("[, ]+"); binnedLengths = new int[binnedLengthStrs.length]; for (int i = 0; i < binnedLengths.length; i++) { binnedLengths[i] = Integer.parseInt(binnedLengthStrs[i]); } } } else if (key.equalsIgnoreCase("makeConsistent")) { makeConsistent = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dump")) { dump = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNGrams")) { useNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("conjoinShapeNGrams")) { conjoinShapeNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("lowercaseNGrams")) { lowercaseNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useIsURL")) { useIsURL = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useURLSequences")) { useURLSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityTypes")) { useEntityTypes = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityRule")){ useEntityRule = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOrdinal")){ useOrdinal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEntityTypeSequences")) { useEntityTypeSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useIsDateRange")) { useIsDateRange = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dehyphenateNGrams")) { dehyphenateNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("lowerNewgeneThreshold")) { lowerNewgeneThreshold = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrev")) { usePrev = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNext")) { useNext = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTags")) { useTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordPairs")) { useWordPairs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGazettes")) { useGazettes = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wordShape")) { wordShape = WordShapeClassifier.lookupShaper(val); } else if (key.equalsIgnoreCase("useShapeStrings")) { useShapeStrings = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGoodForNamesCpC")) { useGoodForNamesCpC = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictionaryConjunctions")) { useDictionaryConjunctions = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictionaryConjunctions3")) { useDictionaryConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("expandMidDot")) { expandMidDot = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSequences")) { useSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevSequences")) { usePrevSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextSequences")) { useNextSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLongSequences")) { useLongSequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBoundarySequences")) { useBoundarySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTaggySequences")) { useTaggySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useExtraTaggySequences")) { useExtraTaggySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTaggySequencesShapeInteraction")) { useTaggySequencesShapeInteraction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyZeroethOrder")) { strictlyZeroethOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyFirstOrder")) { strictlyFirstOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlySecondOrder")) { strictlySecondOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("strictlyThirdOrder")) { strictlyThirdOrder = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("dontExtendTaggy")) { dontExtendTaggy = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("entitySubclassification")) { entitySubclassification = val; } else if (key.equalsIgnoreCase("useGazettePhrases")) { useGazettePhrases = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSum")) { useSum = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("tolerance")) { tolerance = Double.parseDouble(val); } else if (key.equalsIgnoreCase("maxIterations")) { maxIterations = Integer.parseInt(val); } else if (key.equalsIgnoreCase("printFeatures")) { printFeatures = val; } else if (key.equalsIgnoreCase("printFeaturesUpto")) { printFeaturesUpto = Integer.parseInt(val); } else if (key.equalsIgnoreCase("lastNameList")) { lastNameList = val; } else if (key.equalsIgnoreCase("maleNameList")) { maleNameList = val; } else if (key.equalsIgnoreCase("femaleNameList")) { femaleNameList = val; } else if (key.equalsIgnoreCase("useSymTags")) { useSymTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSymWordPairs")) { useSymWordPairs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printClassifier")) { printClassifier = val; } else if (key.equalsIgnoreCase("printClassifierParam")) { printClassifierParam = Integer.parseInt(val); } else if (key.equalsIgnoreCase("intern")) { intern = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("mergetags")) { mergeTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("iobtags")) { iobTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useViterbi")) { useViterbi = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("intern2")) { intern2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("selfTest")) { selfTest = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sloppyGazette")) { sloppyGazette = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("cleanGazette")) { cleanGazette = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("noMidNGrams")) { noMidNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useReverse")) { useReverse = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("retainEntitySubclassification")) { retainEntitySubclassification = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLemmas")) { useLemmas = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevNextLemmas")) { usePrevNextLemmas = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalizeTerms")) { normalizeTerms = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalizeTimex")) { normalizeTimex = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNB")) { useNB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useParenMatching")) { useParenMatching = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs")) { useTypeSeqs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs2")) { useTypeSeqs2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeSeqs3")) { useTypeSeqs3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDisjunctive")) { useDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("disjunctionWidth")) { disjunctionWidth = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useDisjunctiveShapeInteraction")) { useDisjunctiveShapeInteraction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWideDisjunctive")) { useWideDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("wideDisjunctionWidth")) { wideDisjunctionWidth = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useDisjShape")) { useDisjShape = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTitle")) { useTitle = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("booleanFeatures")) { booleanFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useClassFeature")) { useClassFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeConjunctions")) { useShapeConjunctions = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordTag")) { useWordTag = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNPHead")) { useNPHead = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNPGovernor")) { useNPGovernor = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHeadGov")) { useHeadGov = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useLastRealWord")) { useLastRealWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextRealWord")) { useNextRealWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOccurrencePatterns")) { useOccurrencePatterns = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTypeySequences")) { useTypeySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("justify")) { justify = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("normalize")) { normalize = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("priorType")) { priorType = val; } else if (key.equalsIgnoreCase("sigma")) { sigma = Double.parseDouble(val); } else if (key.equalsIgnoreCase("epsilon")) { epsilon = Double.parseDouble(val); } else if (key.equalsIgnoreCase("beamSize")) { beamSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("removeTopN")) { removeTopN = Integer.parseInt(val); } else if (key.equalsIgnoreCase("removeTopNPercent")) { removeTopNPercent = Double.parseDouble(val); } else if (key.equalsIgnoreCase("randomizedRatio")) { randomizedRatio = Double.parseDouble(val); } else if (key.equalsIgnoreCase("numTimesRemoveTopN")) { numTimesRemoveTopN = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxLeft")) { maxLeft = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxRight")) { maxRight = Integer.parseInt(val); } else if (key.equalsIgnoreCase("maxNGramLeng")) { maxNGramLeng = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useGazFeatures")) { useGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAltGazFeatures")) { useAltGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMoreGazFeatures")) { useMoreGazFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbbr")) { useAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMinimalAbbr")) { useMinimalAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbbr1")) { useAbbr1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMinimalAbbr1")) { useMinimalAbbr1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("documentReader")) { System.err.println("You are using an outdated flag: -documentReader "+val); System.err.println("Please use -readerAndWriter instead."); } else if (key.equalsIgnoreCase("deleteBlankLines")) { deleteBlankLines = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("answerFile")) { answerFile = val; } else if (key.equalsIgnoreCase("altAnswerFile")) { altAnswerFile = val; } else if (key.equalsIgnoreCase("loadClassifier")) { loadClassifier = val; } else if (key.equalsIgnoreCase("loadTextClassifier")) { loadTextClassifier = val; } else if (key.equalsIgnoreCase("loadJarClassifier")) { loadJarClassifier = val; } else if (key.equalsIgnoreCase("loadAuxClassifier")) { loadAuxClassifier=val; } else if (key.equalsIgnoreCase("serializeTo")) { serializeTo = val; } else if (key.equalsIgnoreCase("serializeToText")) { serializeToText = val; } else if (key.equalsIgnoreCase("serializeDatasetsDir")) { serializeDatasetsDir = val; } else if (key.equalsIgnoreCase("loadDatasetsDir")) { loadDatasetsDir = val; } else if (key.equalsIgnoreCase("pushDir")) { pushDir = val; } else if (key.equalsIgnoreCase("purgeDatasets")) { purgeDatasets = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepOBInMemory")) { keepOBInMemory = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("fakeDataset")) { fakeDataset = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("numDatasetsPerFile")) { numDatasetsPerFile = Integer.parseInt(val); } else if (key.equalsIgnoreCase("trainFile")) { trainFile = val; } else if (key.equalsIgnoreCase("biasedTrainFile")) { biasedTrainFile = val; } else if (key.equalsIgnoreCase("classBias")) { classBias = val; } else if (key.equalsIgnoreCase("confusionMatrix")) { confusionMatrix = val; } else if (key.equalsIgnoreCase("adaptFile")) { adaptFile = val; } else if (key.equalsIgnoreCase("devFile")) { devFile = val; } else if (key.equalsIgnoreCase("testFile")) { testFile = val; } else if (key.equalsIgnoreCase("textFile")) { textFile = val; } else if (key.equalsIgnoreCase("initialWeights")) { initialWeights = val; } else if (key.equalsIgnoreCase("interimOutputFreq")) { interimOutputFreq = Integer.parseInt(val); } else if (key.equalsIgnoreCase("inputEncoding")) { inputEncoding = val; } else if (key.equalsIgnoreCase("outputEncoding")) { outputEncoding = val; } else if (key.equalsIgnoreCase("gazette")) { useGazettes = true; StringTokenizer st = new StringTokenizer(val, " ,;\t"); if (gazettes == null) { gazettes = new ArrayList<String>(); } // for after deserialization, as gazettes is transient while (st.hasMoreTokens()) { gazettes.add(st.nextToken()); } } else if (key.equalsIgnoreCase("useQN")) { useQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("QNsize")) { QNsize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("QNsize2")) { QNsize2 = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useFloat")) { useFloat = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("trainMap")) { System.err.println("trainMap and testMap are no longer valid options - please use map instead."); throw new RuntimeException(); } else if (key.equalsIgnoreCase("testMap")) { System.err.println("trainMap and testMap are no longer valid options - please use map instead."); throw new RuntimeException(); } else if (key.equalsIgnoreCase("map")) { map = val; } else if (key.equalsIgnoreCase("useMoreAbbr")) { useMoreAbbr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrevVB")) { usePrevVB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNextVB")) { useNextVB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useVB")) { if (Boolean.parseBoolean(val)) { useVB = true; usePrevVB = true; useNextVB = true; } } else if (key.equalsIgnoreCase("useChunks")) { useChunks = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useChunkySequences")) { useChunkySequences = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("greekifyNGrams")) { greekifyNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("restrictTransitionsTimit")) { restrictTransitionsTimit = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMoreTags")) { useMoreTags = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBeginSent")) { useBeginSent = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePosition")) { usePosition = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useGenia")) { useGENIA = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstr")) { useABSTR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWeb")) { useWEB = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAnte")) { useANTE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAcr")) { useACR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useTok")) { useTOK = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbgene")) { useABGENE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstrFreqDict")) { useABSTRFreqDict = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAbstrFreq")) { useABSTRFreq = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFreq")) { useFREQ = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usewebfreqdict")) { useWEBFreqDict = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("bioSubmitOutput")) { bioSubmitOutput = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("subCWGaz")) { subCWGaz = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("splitOnHead")) { splitOnHead = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureCountThreshold")) { featureCountThreshold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useWord")) { useWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("memoryThrift")) { memoryThrift = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("timitDatum")) { timitDatum = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("splitDocuments")) { splitDocuments = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureWeightThreshold")) { featureWeightThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("backgroundSymbol")) { backgroundSymbol = val; } else if (key.equalsIgnoreCase("featureFactory")) { featureFactory = val; if (featureFactory.equalsIgnoreCase("SuperSimpleFeatureFactory")) { featureFactory = "edu.stanford.nlp.sequences.SuperSimpleFeatureFactory"; } else if (featureFactory.equalsIgnoreCase("NERFeatureFactory")) { featureFactory = "edu.stanford.nlp.ie.NERFeatureFactory"; } else if (featureFactory.equalsIgnoreCase("GazNERFeatureFactory")) { featureFactory = "edu.stanford.nlp.sequences.GazNERFeatureFactory"; } else if (featureFactory.equalsIgnoreCase("IncludeAllFeatureFactory")) { featureFactory = "edu.stanford.nlp.sequences.IncludeAllFeatureFactory"; } } else if (key.equalsIgnoreCase("printXML")) { printXML = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSeenFeaturesOnly")) { useSeenFeaturesOnly = Boolean.parseBoolean(val); // chinese word-segmenter features } else if (key.equalsIgnoreCase("useRadical")) { useRadical = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useBigramInTwoClique")) { useBigramInTwoClique = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useReverseAffix")) { useReverseAffix = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("charHalfWindow")) { charHalfWindow = Integer.parseInt(val); } else if (key.equalsIgnoreCase("purgeFeatures")) { purgeFeatures = Integer.parseInt(val); } else if (key.equalsIgnoreCase("ocrFold")) { ocrFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("morphFeatureFile")) { morphFeatureFile = val; } else if (key.equalsIgnoreCase("svmModelFile")) { svmModelFile = val; /*Dictionary*/ } else if (key.equalsIgnoreCase("useDictleng")) { useDictleng = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDict2")) { useDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOutDict2")) { useOutDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("outDict2")) { outDict2 = val; } else if (key.equalsIgnoreCase("useDictCTB2")) { useDictCTB2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictASBC2")) { useDictASBC2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictPK2")) { useDictPK2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDictHK2")) { useDictHK2 = Boolean.parseBoolean(val); /*N-gram flags*/ } else if (key.equalsIgnoreCase("useWord1")) { useWord1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord2")) { useWord2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord3")) { useWord3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWord4")) { useWord4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad1")) { useRad1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad2")) { useRad2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRad2b")) { useRad2b = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordn")) { useWordn = Boolean.parseBoolean(val); /*affix flags*/ } else if (key.equalsIgnoreCase("useCTBPre1")) { useCTBPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useCTBSuf1")) { useCTBSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCPre1")) { useASBCPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCSuf1")) { useASBCSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKPre1")) { useHKPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKSuf1")) { useHKSuf1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKPre1")) { usePKPre1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKSuf1")) { usePKSuf1 = Boolean.parseBoolean(val); /*POS flags*/ } else if (key.equalsIgnoreCase("useCTBChar2")) { useCTBChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePrediction")) { usePrediction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useASBCChar2")) { useASBCChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHKChar2")) { useHKChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePKChar2")) { usePKChar2 = Boolean.parseBoolean(val); /* Rule flag */ } else if (key.equalsIgnoreCase("useRule2")) { useRule2 = Boolean.parseBoolean(val); /*ASBC and HK */ } else if (key.equalsIgnoreCase("useBig5")) { useBig5 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict2")) { useNegDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict3")) { useNegDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegDict4")) { useNegDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict2")) { useNegCTBDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict3")) { useNegCTBDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegCTBDict4")) { useNegCTBDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict2")) { useNegASBCDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict3")) { useNegASBCDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegASBCDict4")) { useNegASBCDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict2")) { useNegPKDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict3")) { useNegPKDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegPKDict4")) { useNegPKDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict2")) { useNegHKDict2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict3")) { useNegHKDict3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNegHKDict4")) { useNegHKDict4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePre")) { usePre = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSuf")) { useSuf = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useRule")) { useRule = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAs")) { useAs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("usePk")) { usePk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useHk")) { useHk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMsr")) { useMsr = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMSRChar2")) { useMSRChar2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC4gram")) { useFeaturesC4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC5gram")) { useFeaturesC5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesC6gram")) { useFeaturesC6gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC4gram")) { useFeaturesCpC4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC5gram")) { useFeaturesCpC5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFeaturesCpC6gram")) { useFeaturesCpC6gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType")) { useUnicodeType = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeBlock")) { useUnicodeBlock = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType4gram")) { useUnicodeType4gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnicodeType5gram")) { useUnicodeType5gram = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings1")) { useShapeStrings1 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings3")) { useShapeStrings3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings4")) { useShapeStrings4 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useShapeStrings5")) { useShapeStrings5 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordUTypeConjunctions2")) { useWordUTypeConjunctions2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordUTypeConjunctions3")) { useWordUTypeConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordShapeConjunctions2")) { useWordShapeConjunctions2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useWordShapeConjunctions3")) { useWordShapeConjunctions3 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMidDotShape")) { useMidDotShape = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("augmentedDateChars")) { augmentedDateChars = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("suppressMidDotPostprocessing")) { suppressMidDotPostprocessing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printNR")) { printNR = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("use4Clique")) { use4Clique = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFilter")) { useFilter = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("largeChSegFile")) { largeChSegFile = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepEnglishWhitespaces")) { keepEnglishWhitespaces = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("keepAllWhitespaces")) { keepAllWhitespaces = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sighanPostProcessing")) { sighanPostProcessing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useChPos")) { useChPos = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("sighanCorporaDict")) { sighanCorporaDict = val; // end chinese word-segmenter features } else if (key.equalsIgnoreCase("useObservedSequencesOnly")) { useObservedSequencesOnly = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("maxDocSize")) { maxDocSize = Integer.parseInt(val); splitDocuments = true; } else if (key.equalsIgnoreCase("printProbs")) { printProbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printFirstOrderProbs")) { printFirstOrderProbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("saveFeatureIndexToDisk")) { saveFeatureIndexToDisk = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("removeBackgroundSingletonFeatures")) { removeBackgroundSingletonFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("doGibbs")) { doGibbs = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNERPrior")) { useNERPrior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useAcqPrior")) { useAcqPrior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSemPrior")) { useSemPrior = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useMUCFeatures")) { useMUCFeatures = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("initViterbi")) { initViterbi = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("checkNameList")) { checkNameList = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useFirstWord")) { useFirstWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useUnknown")) { useUnknown = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("cacheNGrams")) { cacheNGrams = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useNumberFeature")) { useNumberFeature = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("annealingRate")) { annealingRate = Double.parseDouble(val); } else if (key.equalsIgnoreCase("annealingType")) { if (val.equalsIgnoreCase("linear") || val.equalsIgnoreCase("exp") || val.equalsIgnoreCase("exponential")) { annealingType = val; } else { System.err.println("unknown annealingType: "+val+". Please use linear|exp|exponential"); } } else if (key.equalsIgnoreCase("numSamples")) { numSamples = Integer.parseInt(val); } else if (key.equalsIgnoreCase("inferenceType")) { inferenceType = val; } else if (key.equalsIgnoreCase("loadProcessedData")) { loadProcessedData = val; } else if (key.equalsIgnoreCase("normalizationTable")) { normalizationTable = val; } else if (key.equalsIgnoreCase("dictionary")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && ! "true".equals(val) && ! "null".equals(val) && ! "false".equals("val")) { dictionary = val; } else { dictionary = null; } } else if (key.equalsIgnoreCase("serDictionary")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && ! "true".equals(val) && ! "null".equals(val) && ! "false".equals("val")) { serializedDictionary = val; } else { serializedDictionary = null; } } else if (key.equalsIgnoreCase("dictionary2")) { // don't set if empty string or spaces or true: revert it to null // special case so can empty out dictionary list on command line! val = val.trim(); if (val.length() > 0 && ! "true".equals(val) && ! "null".equals(val) && ! "false".equals("val")) { dictionary2 = val; } else { dictionary2 = null; } } else if (key.equalsIgnoreCase("normTableEncoding")) { normTableEncoding = val; } else if (key.equalsIgnoreCase("useLemmaAsWord")) { useLemmaAsWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("type")) { type = val; } else if (key.equalsIgnoreCase("readerAndWriter")) { readerAndWriter = val; } else if (key.equalsIgnoreCase("gazFilesFile")) { gazFilesFile = val; } else if (key.equalsIgnoreCase("baseTrainDir")) { baseTrainDir = val; } else if (key.equalsIgnoreCase("baseTestDir")) { baseTestDir = val; } else if (key.equalsIgnoreCase("trainFiles")) { trainFiles = val; } else if (key.equalsIgnoreCase("trainFileList")) { trainFileList = val; } else if (key.equalsIgnoreCase("trainDirs")){ trainDirs = val; } else if (key.equalsIgnoreCase("testDirs")){ testDirs = val; } else if (key.equalsIgnoreCase("testFiles")) { testFiles = val; } else if (key.equalsIgnoreCase("usePrediction2")) { usePrediction2 = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useObservedFeaturesOnly")) { useObservedFeaturesOnly = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("iobWrapper")) { iobWrapper = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useDistSim")) { useDistSim = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useOnlySeenWeights")) { useOnlySeenWeights = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("predProp")) { predProp = val; } else if (key.equalsIgnoreCase("distSimLexicon")) { distSimLexicon = val; } else if (key.equalsIgnoreCase("useSegmentation")) { useSegmentation = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useInternal")) { useInternal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useExternal")) { useExternal = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEitherSideWord")) { useEitherSideWord = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useEitherSideDisjunctive")) { useEitherSideDisjunctive = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("featureDiffThresh")) { featureDiffThresh = Double.parseDouble(val); if (props.getProperty("numTimesPruneFeatures") == null) { numTimesPruneFeatures = 1; } } else if (key.equalsIgnoreCase("numTimesPruneFeatures")) { numTimesPruneFeatures = Integer.parseInt(val); } else if (key.equalsIgnoreCase("newgeneThreshold")) { newgeneThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("adaptFile")) { adaptFile = val; } else if (key.equalsIgnoreCase("doAdaptation")) { doAdaptation = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("selfTrainFile")) { selfTrainFile = val; } else if (key.equalsIgnoreCase("selfTrainIterations")) { selfTrainIterations = Integer.parseInt(val); } else if (key.equalsIgnoreCase("selfTrainWindowSize")) { selfTrainWindowSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("selfTrainConfidenceThreshold")) { selfTrainConfidenceThreshold = Double.parseDouble(val); } else if (key.equalsIgnoreCase("numFolds")) { numFolds = Integer.parseInt(val); } else if (key.equalsIgnoreCase("startFold")) { startFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("endFold")) { endFold = Integer.parseInt(val); } else if (key.equalsIgnoreCase("adaptSigma")) { adaptSigma = Double.parseDouble(val); } else if (key.startsWith("prop") && !key.equals("prop")) { comboProps.add(val); } else if (key.equalsIgnoreCase("outputFormat")) { outputFormat = val; } else if (key.equalsIgnoreCase("useSMD")) { useSMD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useScaledSGD")){ useScaledSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("scaledSGDMethod")){ scaledSGDMethod = Integer.parseInt(val); } else if (key.equalsIgnoreCase("tuneSGD")){ tuneSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("StochasticCalculateMethod")) { if (val.equalsIgnoreCase("AlgorithmicDifferentiation")){ stochasticMethod = StochasticCalculateMethods.AlgorithmicDifferentiation; } else if(val.equalsIgnoreCase("IncorporatedFiniteDifference")){ stochasticMethod = StochasticCalculateMethods.IncorporatedFiniteDifference ; } else if(val.equalsIgnoreCase("ExternalFinitedifference")){ stochasticMethod = StochasticCalculateMethods.ExternalFiniteDifference ; } } else if (key.equalsIgnoreCase("initialGain")) { initialGain = Double.parseDouble(val); } else if (key.equalsIgnoreCase("stochasticBatchSize")){ stochasticBatchSize = Integer.parseInt(val); } else if (key.equalsIgnoreCase("SGD2QNhessSamples")){ SGD2QNhessSamples = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useSGD")) { useSGD = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("useSGDtoQN")){ useSGDtoQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("SGDPasses")){ SGDPasses = Integer.parseInt(val); } else if (key.equalsIgnoreCase("QNPasses")){ QNPasses = Integer.parseInt(val); } else if (key.equalsIgnoreCase("gainSGD")) { gainSGD = Double.parseDouble(val); } else if (key.equalsIgnoreCase("useHybrid")){ useHybrid = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("hybridCutoffIteration")){ hybridCutoffIteration = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useStochasticQN")){ useStochasticQN = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("outputIterationsToFile")){ outputIterationsToFile = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("testObjFunction")){ testObjFunction = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("testVariance")){ testVariance = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("CRForder")){ CRForder = Integer.parseInt(val); } else if (key.equalsIgnoreCase("CRFwindow")){ CRFwindow = Integer.parseInt(val); } else if (key.equalsIgnoreCase("testHessSamples")){ testHessSamples = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("estimateInitial")){ estimateInitial = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("printLabelValue")){ printLabelValue = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("searchGraphPrefix")){ searchGraphPrefix = val; } else if (key.equalsIgnoreCase("searchGraphPrune")){ searchGraphPrune = Double.parseDouble(val); } else if (key.equalsIgnoreCase("kBest")){ useKBest = true; kBest = Integer.parseInt(val); } else if (key.equalsIgnoreCase("useRobustQN")){ useRobustQN = true; } else if (key.equalsIgnoreCase("combo")){ combo = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("verboseForTrueCasing")) { verboseForTrueCasing = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("trainHierarchical")){ trainHierarchical = val; } else if (key.equalsIgnoreCase("domain")){ domain = val; } else if(key.equalsIgnoreCase("baseline")) { baseline = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("doFE")) { doFE = Boolean.parseBoolean(val); } else if (key.equalsIgnoreCase("restrictLabels")) { restrictLabels = Boolean.parseBoolean(val); } else if(key.equalsIgnoreCase("transferSigmas")){ transferSigmas = val; } else if (key.equalsIgnoreCase("announceObjectBankEntries")) { announceObjectBankEntries = true; } else if (key.length() > 0 && !key.equals("prop")) { System.err.println("Unknown property: |" + key + '|'); } } if (startFold > numFolds) { System.err.println("startFold > numFolds -> setting startFold to 1"); startFold = 1; } if (endFold > numFolds) { System.err.println("endFold > numFolds -> setting to numFolds"); endFold = numFolds; } if (combo) { splitDocuments = false; } stringRep = sb.toString(); } // end setProperties() /** Print the properties specified by this object. * @return A String describing the properties specified by this object. */ @Override public String toString() { return stringRep; } } // end class SeqClassifierFlags