package optimizer; /** * * @author Miguel Ballesteros * * This class generates the options file */ public class OptionsGenerator { String libValue; String modelName;//nivreeager String modelNameStackLazy; String algorithm; String trainingCorpus; String libraryOptions; boolean danglingPunctuation; boolean rootGRL; String realRoot; public OptionsGenerator(String language, boolean projective, String trainingCorpus, boolean danglingPunct, boolean rootGRL, String realRoot) { this.trainingCorpus = trainingCorpus; this.danglingPunctuation = danglingPunct; this.rootGRL = rootGRL; this.realRoot = realRoot; //libraryOptions="-s_0_-t_1_-d_2_-g_0.2_-c_1.0_-r_0.4_-e_0.1"; /* * modelName=language+"Model"; if (language.equals("en")){ * libValue="-s_0_-t_1_-d_2_-g_0.18_-c_0.4_-r_0.4_-e_1.0"; } if * (language.equals("es")){ * libValue="-s_0_-t_1_-d_2_-g_0.2_-c_0.5_-r_0_-e_0.01"; } if * (language.equals("sw")){ * * } */ //etc.... if (projective) { algorithm = "nivreeager"; } else { algorithm = "covnonproj"; } } public String generateIncOptionsPrevioGRL(String lang, String algorithm, String training80, String rootHandling, String libOptions) { LibraryOptionsSetter lo = LibraryOptionsSetter.getSingleton(); libraryOptions = lo.getLibraryOptions(); String options = ""; algorithm = "nivreeager"; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n<experiment>"; options += "\n\t<optioncontainer>"; options += "\n\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t" + "<option name='name' value='" + lang + "Model'/>"; options += "\n\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t" + "<option name='infile' value='" + training80 + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t" + "<option name='options' value='" + libOptions + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='guide'>"; options += "\n\t\t\t" + "<option name='learner' value='liblinear'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t</optioncontainer>"; options += "\n</experiment>"; options = options.replaceAll("'", "\""); return options; } public String generateIncOptionsPrevioPCR(String lang, String algorithm, String training80, String rootHandling, String libOptions, String rootLabel) { LibraryOptionsSetter lo = LibraryOptionsSetter.getSingleton(); libraryOptions = lo.getLibraryOptions(); String options = ""; algorithm = "nivreeager"; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n<experiment>"; options += "\n\t<optioncontainer>"; options += "\n\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t" + "<option name='name' value='" + lang + "Model'/>"; options += "\n\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t" + "<option name='infile' value='" + training80 + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='graph'>"; options += "\n\t\t\t" + "<option name='root_label' value='" + rootLabel + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t" + "<option name='options' value='" + libOptions + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='guide'>"; options += "\n\t\t\t" + "<option name='learner' value='liblinear'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t</optioncontainer>"; options += "\n</experiment>"; options = options.replaceAll("'", "\""); return options; } public String generateIncOptionsPhase1(String lang, String algorithm, String training80, String rootHandling, String libOptions, String rootLabel, String pcr) { LibraryOptionsSetter lo = LibraryOptionsSetter.getSingleton(); libraryOptions = lo.getLibraryOptions(); String options = ""; algorithm = "nivreeager"; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n<experiment>"; options += "\n\t<optioncontainer>"; options += "\n\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t" + "<option name='name' value='" + lang + "Model'/>"; options += "\n\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='singlemalt'>"; options += "\n\t\t\t" + "<option name='parsing_algorithm' value='" + algorithm + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t" + "<option name='infile' value='" + training80 + "'/>"; options += "\n\t\t" + "</optiongroup>"; if (algorithm.contains("nivre")) { options += "\n\t\t" + "<optiongroup groupname='nivre'>"; options += "\n\t\t\t" + "<option name='allow_root' value='" + Optimizer.allow_rootNiv + "'/>"; options += "\n\t\t\t" + "<option name='allow_reduce' value='" + Optimizer.allow_reduceNiv + "'/>"; options += "\n\t\t" + "</optiongroup>"; } options += "\n\t\t" + "<optiongroup groupname='graph'>"; options += "\n\t\t\t" + "<option name='root_label' value='" + rootLabel + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='pproj'>"; options += "\n\t\t\t" + "<option name='covered_root' value='" + pcr + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t" + "<option name='options' value='" + libOptions + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='guide'>"; options += "\n\t\t\t" + "<option name='learner' value='liblinear'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t</optioncontainer>"; options += "\n</experiment>"; options = options.replaceAll("'", "\""); return options; } public String generateIncOptionsTestingsPhases(String lang, String algorithm, String training80, String rootHandling, String libOptions, String rootLabel, String pcr) { LibraryOptionsSetter lo = LibraryOptionsSetter.getSingleton(); libraryOptions = lo.getLibraryOptions(); String options = ""; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n<experiment>"; options += "\n\t<optioncontainer>"; options += "\n\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t" + "<option name='name' value='" + lang + "Model'/>"; options += "\n\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='singlemalt'>"; options += "\n\t\t\t" + "<option name='parsing_algorithm' value='" + algorithm + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t" + "<option name='infile' value='" + training80 + "'/>"; options += "\n\t\t" + "</optiongroup>"; if (algorithm.contains("nivre")) { options += "\n\t\t" + "<optiongroup groupname='nivre'>"; options += "\n\t\t\t" + "<option name='allow_root' value='" + Optimizer.allow_rootNiv + "'/>"; options += "\n\t\t\t" + "<option name='allow_reduce' value='" + Optimizer.allow_reduceNiv + "'/>"; options += "\n\t\t" + "</optiongroup>"; } options += "\n\t\t" + "<optiongroup groupname='graph'>"; options += "\n\t\t\t" + "<option name='root_label' value='" + rootLabel + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='pproj'>"; if (Optimizer.usePPOption) { options += "\n\t\t\t" + "<option name='marking_strategy' value='" + Optimizer.ppOption + "'/>"; } options += "\n\t\t\t" + "<option name='covered_root' value='" + pcr + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t" + "<option name='options' value='" + libOptions + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='guide'>"; options += "\n\t\t\t" + "<option name='learner' value='liblinear'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t</optioncontainer>"; options += "\n</experiment>"; options = options.replaceAll("'", "\""); return options; } public String generateIncOptionsTestingsPhasesb(String lang, String algorithm, String training80, String rootHandling, String libOptions, String rootLabel, String pcr, String pp) { LibraryOptionsSetter lo = LibraryOptionsSetter.getSingleton(); libraryOptions = lo.getLibraryOptions(); String options = ""; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n<experiment>"; options += "\n\t<optioncontainer>"; options += "\n\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t" + "<option name='name' value='" + lang + "Model'/>"; options += "\n\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='singlemalt'>"; options += "\n\t\t\t" + "<option name='parsing_algorithm' value='" + algorithm + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t" + "<option name='infile' value='" + training80 + "'/>"; options += "\n\t\t" + "</optiongroup>"; if (algorithm.contains("nivre")) { options += "\n\t\t" + "<optiongroup groupname='nivre'>"; options += "\n\t\t\t" + "<option name='allow_root' value='" + Optimizer.allow_rootNiv + "'/>"; options += "\n\t\t\t" + "<option name='allow_reduce' value='" + Optimizer.allow_reduceNiv + "'/>"; options += "\n\t\t" + "</optiongroup>"; } options += "\n\t\t" + "<optiongroup groupname='graph'>"; options += "\n\t\t\t" + "<option name='root_label' value='" + rootLabel + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='pproj'>"; options += "\n\t\t\t" + "<option name='marking_strategy' value='" + pp + "'/>"; options += "\n\t\t\t" + "<option name='covered_root' value='" + pcr + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t" + "<option name='options' value='" + libOptions + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='guide'>"; options += "\n\t\t\t" + "<option name='learner' value='liblinear'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t</optioncontainer>"; options += "\n</experiment>"; options = options.replaceAll("'", "\""); return options; } public String generateIncOptionsTestingsEndPhase2(String lang, String algorithm, String training80, String rootHandling, String libOptions, String rootLabel, String pcr, String pp, boolean allowShift, boolean allowROOT) { LibraryOptionsSetter lo = LibraryOptionsSetter.getSingleton(); libraryOptions = lo.getLibraryOptions(); String options = ""; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n<experiment>"; options += "\n\t<optioncontainer>"; options += "\n\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t" + "<option name='name' value='" + lang + "Model'/>"; options += "\n\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='singlemalt'>"; options += "\n\t\t\t" + "<option name='parsing_algorithm' value='" + algorithm + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t" + "<option name='infile' value='" + training80 + "'/>"; options += "\n\t\t" + "</optiongroup>"; if (algorithm.contains("nivre")) { options += "\n\t\t" + "<optiongroup groupname='nivre'>"; options += "\n\t\t\t" + "<option name='allow_root' value='" + Optimizer.allow_rootNiv + "'/>"; options += "\n\t\t\t" + "<option name='allow_reduce' value='" + Optimizer.allow_reduceNiv + "'/>"; options += "\n\t\t" + "</optiongroup>"; } if (algorithm.contains("cov")) { options += "\n\t\t" + "<optiongroup groupname='covington'>"; options += "\n\t\t\t" + "<option name='allow_root' value='" + allowROOT + "'/>"; options += "\n\t\t\t" + "<option name='allow_shift' value='" + allowShift + "'/>"; options += "\n\t\t" + "</optiongroup>"; } options += "\n\t\t" + "<optiongroup groupname='graph'>"; options += "\n\t\t\t" + "<option name='root_label' value='" + rootLabel + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='pproj'>"; options += "\n\t\t\t" + "<option name='marking_strategy' value='" + pp + "'/>"; options += "\n\t\t\t" + "<option name='covered_root' value='" + pcr + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t" + "<option name='options' value='" + libOptions + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='guide'>"; options += "\n\t\t\t" + "<option name='learner' value='liblinear'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t</optioncontainer>"; options += "\n</experiment>"; options = options.replaceAll("'", "\""); return options; } public String getLibraryOptions() { return libraryOptions; } public void setLibraryOptions(String libraryOptions) { this.libraryOptions = libraryOptions; } public OptionsGenerator(String language, String trainingCorpus) { modelName = language + "Model"; modelNameStackLazy = language + "ModelStack"; this.trainingCorpus = trainingCorpus; //System.out.println(trainingCorpus); } public OptionsGenerator(String bestAlgorithm, String language, String trainingCorpus, String liboptions) { this.algorithm = bestAlgorithm; this.libValue = liboptions; this.trainingCorpus = trainingCorpus; modelName = language + "Model"; } public OptionsGenerator() { } public String generateOptionsNivreEager() { LibraryOptionsSetter lo = LibraryOptionsSetter.getSingleton(); libraryOptions = lo.getLibraryOptions(); String options = ""; algorithm = "nivreeager"; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n<experiment>"; options += "\n\t<optioncontainer>"; options += "\n\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t" + "<option name='name' value='" + modelName + "'/>"; options += "\n\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='singlemalt'>"; options += "\n\t\t\t" + "<option name='parsing_algorithm' value='" + algorithm + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t" + "<option name='infile' value='" + trainingCorpus + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t" + "<option name='options' value='" + libraryOptions + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='guide'>"; //<option name="learner" value="liblinear"/> options += "\n\t\t\t" + "<option name='learner' value='liblinear'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t</optioncontainer>"; options += "\n</experiment>"; options = options.replaceAll("'", "\""); return options; } public String generateOptionsStackLazy() { LibraryOptionsSetter lo = LibraryOptionsSetter.getSingleton(); libraryOptions = lo.getLibraryOptions(); String options = ""; algorithm = "stacklazy"; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n<experiment>"; options += "\n\t<optioncontainer>"; options += "\n\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t" + "<option name='name' value='" + modelNameStackLazy + "'/>"; options += "\n\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='singlemalt'>"; options += "\n\t\t\t" + "<option name='parsing_algorithm' value='" + algorithm + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t" + "<option name='infile' value='" + trainingCorpus + "'/>"; options += "\n\t\t" + "</optiongroup>"; if (algorithm.contains("nivre")) { options += "\n\t\t" + "<optiongroup groupname='nivre'>"; options += "\n\t\t\t" + "<option name='allow_root' value='" + Optimizer.allow_rootNiv + "'/>"; options += "\n\t\t\t" + "<option name='allow_reduce' value='" + Optimizer.allow_reduceNiv + "'/>"; options += "\n\t\t" + "</optiongroup>"; } options += "\n\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t" + "<option name='options' value='" + libraryOptions + "'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t\t" + "<optiongroup groupname='guide'>"; options += "\n\t\t\t" + "<option name='data_split_column' value='POSTAG'/>"; options += "\n\t\t\t" + "<option name='data_split_structure' value='Input[0]'/>"; options += "\n\t\t\t" + "<option name='data_split_threshold' value='1000'/>"; options += "\n\t\t" + "</optiongroup>"; options += "\n\t</optioncontainer>"; options += "\n</experiment>"; options = options.replaceAll("'", "\""); return options; } public String generateOptionsFile() { String options = ""; options += "<?xml version='1.0' encoding='UTF-8'?>"; options += "\n\t<experiment>"; options += "\n\t\t<optioncontainer>"; options += "\n\t\t\t" + "<optiongroup groupname='config'>"; options += "\n\t\t\t\t" + "<option name='name' value='" + modelName + "'/>"; options += "\n\t\t\t\t" + "<option name='flowchart' value='learn'/>"; options += "\n\t\t\t" + "</optiongroup>"; options += "\n\t\t\t" + "<optiongroup groupname='singlemalt'>"; options += "\n\t\t\t\t" + "<option name='parsing_algorithm' value='" + algorithm + "'/>"; options += "\n\t\t\t" + "</optiongroup>"; options += "\n\t\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t\t" + "<option name='name' value='" + trainingCorpus + "'/>"; options += "\n\t\t\t" + "</optiongroup>"; options += "\n\t\t\t" + "<optiongroup groupname='input'>"; options += "\n\t\t\t\t" + "<option name='root_handling' value='strict'/>"; options += "\n\t\t\t" + "</optiongroup>"; options += "\n\t\t\t" + "<optiongroup groupname='lib'>"; options += "\n\t\t\t\t" + "<option name='name' value='" + libValue + "'/>"; options += "\n\t\t\t" + "</optiongroup>"; options += "\n\t\t\t" + "<optiongroup groupname='guide'>"; options += "\n\t\t\t\t" + "<option name='data_split_column' value='POSTAG'/>"; options += "\n\t\t\t\t" + "<option name='data_split_structure' value='Input[0]'/>"; options += "\n\t\t\t\t" + "<option name='data_split_threshold' value='1000'/>"; options += "\n\t\t\t" + "</optiongroup>"; options += "\n\t\t</optioncontainer>"; options += "\n\t</experiment>"; options = options.replaceAll("'", "\""); return options; } public String generateTrainingCommand() { return "java -Dfile.encoding=UTF8 -jar malt.jar -f <options_file>.xml -F <feature_model>.xml"; } public String generateTestingCommand() { if (rootGRL && !danglingPunctuation) { return "java -Dfile.encoding=UTF8 -jar malt.jar -c " + modelName + " -i <test_corpus>.conll -o output.conll -m parse"; } if (!rootGRL && !danglingPunctuation) { return "java -Dfile.encoding=UTF8 -jar malt.jar -c " + modelName + " -i <test_corpus>.conll -o output.conll -m parse -grl " + this.realRoot; } if (rootGRL && danglingPunctuation) { return "java -Dfile.encoding=UTF8 -jar malt.jar -c " + modelName + " -i <test_corpus>.conll -o output.conll -m parse -pcr head"; } //if (rootGRL && danglingPunctuation) return "java -Dfile.encoding=UTF8 -jar malt.jar -c " + modelName + " -i <test_corpus>.conll -o output.conll -m parse -grl " + this.realRoot + " -pcr head"; } }