package ir.ac.iust.nlp.dependencyparser;
/*
* Copyright (C) 2013 Iran University of Science and Technology
*
* This file is part of "Dependency Parsing Toolbox" Project, as available
* from http://nlp.iust.ac.ir This file is free software;
* you can redistribute it and/or modify it under the terms of the GNU General
* Public License (GPL) as published by the Free Software Foundation, in
* version 2 as it comes in the "COPYING" file of the VirtualBox OSE
* distribution. VirtualBox OSE is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY of any kind.
*
* You may elect to license modified versions of this file under the terms
* and conditions of either the GPL.
*/
import ir.ac.iust.nlp.dependencyparser.evaluation.EvalSettings;
import ir.ac.iust.nlp.dependencyparser.hybrid.RunnableHybrid;
import ir.ac.iust.nlp.dependencyparser.inputoutput.ReadCorpus;
import ir.ac.iust.nlp.dependencyparser.optomization.RunnableOptimizer;
import ir.ac.iust.nlp.dependencyparser.parsing.RunnableParse;
import ir.ac.iust.nlp.dependencyparser.projection.RunnableProjectivize;
import ir.ac.iust.nlp.dependencyparser.training.RunnableTrain;
import ir.ac.iust.nlp.dependencyparser.utility.enumeration.Flowchart;
import ir.ac.iust.nlp.dependencyparser.utility.enumeration.ParserType;
import ir.ac.iust.nlp.dependencyparser.utility.enumeration.ReparseType;
import ir.ac.iust.nlp.dependencyparser.utility.enumeration.TransformType;
import ir.ac.iust.nlp.dependencyparser.utility.parsing.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import javax.swing.SwingUtilities;
import javax.swing.UIManager;
import javax.swing.UnsupportedLookAndFeelException;
import se.vxu.msi.malteval.MaltEvalConsole;
/**
*
* @author Mojtaba Khallash
*/
public class DependencyParser {
public static String maxRam = "";
public static String minRam = "";
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
boolean visisble = true;
boolean exception = false;
Flowchart mode = Flowchart.None;
Flowchart helpChart = Flowchart.None;
String workingDir = System.getProperty("user.dir");
File tmp = new File(workingDir + File.separator + "tmp");
if (!tmp.exists()) {
tmp.mkdirs();
}
String input = "";
String output = "";
String gold = "";
String model = "";
boolean crossVal = false;
// majority|attardi|eisner|chu_liu_edmond
String method = "majority";
// none|baseline|head|path|head+path
String markingStrategy = "head";
// none|ignore|left|right|head
String coveredRoot = "head";
// shortest|deepest
String LiftingOrder = "shortest";
// 1|2|3|all
String phase = "all";
// settings
// - eval.metric
// LAS|LA|UAS|AnyRight|AnyWrong|BothRight|BothWrong|HeadRight|HeadWrong
// LabelRight|LabelWrong|DirectionRight|GroupedHeadToChildDistanceRight
// HeadToChildDistanceRight
// => NOTE: for selecting multiple metrics, separate them by comma.
// - eval.groupByVal
// Token|Wordform|Lemma|Cpostag|Postag|Feats|Deprel|Sentence|RelationLength
// GroupedRelationLength|SentenceLength|StartWordPosition|EndWordPosition
// ArcDirection|ArcDepth|BranchingFactor|ArcProjectivity|Frame
EvalSettings eval = new EvalSettings();
MaltSettings malt = new MaltSettings();
ClearSettings clear = new ClearSettings();
MSTSettings mst = new MSTSettings();
MateSettings mate = new MateSettings();
ParserType parser = ParserType.MaltParser;
String level = "*";
ParserType parserL0 = ParserType.MSTParser;
ParserType parserL1 = ParserType.MSTParser;
MaltStackSettings maltL0 = new MaltStackSettings();
maltL0.Level = 0;
MSTStackSettings mstL0 = new MSTStackSettings();
MSTStackSettings mstL1 = new MSTStackSettings();
mstL0.Level = 0;
mstL1.Level = 1;
showIntroduction();
try {
for (int i = 0; i< args.length; i++) {
switch (args[i]) {
case "-Xmx":
i++;
maxRam = args[i];
break;
case "-Xms":
i++;
minRam = args[i];
break;
case "-v":
i++;
String val = args[i];
if (!(val.equals("0") || val.equals("1"))) {
throw new Exception("Only 0 or 1 is valid for -v parameter.");
}
visisble = val.equals("1");
break;
case "-mode":
i++;
mode = getChart(args[i].toLowerCase());
break;
case "-help":
i++;
try {
helpChart = getChart(args[i].toLowerCase());
} catch (Exception e) {
i--;
}
break;
// -- proj, deproj, optimizer, train, parse, eval, ensemble, stack param --//
case "-i":
i++;
input = args[i];
break;
// -- proj, deproj, parse, eval, ensemble param --//
case "-o":
i++;
output = args[i];
break;
// -- eval, ensemble param --//
case "-g":
i++;
gold = args[i];
break;
// -- eval param --//
case "-metric":
i++;
eval.metrics = args[i];
break;
case "-group":
i++;
eval.groupByVal = args[i];
break;
// -- ensemble param --//
case "-method":
i++;
method = args[i].toLowerCase();
break;
// -- optimizer param --//
case "-cross_val":
i++;
crossVal = args[i].equals("1");
break;
// -- proj, deproj, train, parse param --//
case "-m":
i++;
model = args[i];
break;
// -- proj params -- //
case "-mark":
i++;
markingStrategy = args[i].toLowerCase();
break;
case "-covered":
i++;
coveredRoot = args[i].toLowerCase();
break;
case "-lift_order":
i++;
LiftingOrder = args[i].toLowerCase();
break;
// -- optimizer and train and parse param --//
case "-parser":
i++;
parser = getParserType(args[i]);
break;
case "-phase":
i++;
phase = args[i];
break;
// -- malt, clear parameters --//
case "-option":
i++;
clear.OptionsFile = malt.OptionsFile = args[i];
break;
case "-guide":
i++;
clear.GuidesFile = malt.GuidesFile = args[i];
break;
// -- clear parameters --//
case "-bootstrap":
i++;
clear.BootstrappingLevel = Integer.parseInt(args[i]);
break;
// -- mst, mate parameters --//
case "-iter":
i++;
mate.Iteration = mst.Iteration = Integer.parseInt(args[i]);
break;
case "-decode":
i++;
mate.DecodeType = mst.DecodeType = args[i].toLowerCase();
break;
// -- mst parameters --//
case "-k":
i++;
mst.TrainingK = Integer.parseInt(args[i]);
break;
case "-order":
i++;
mst.Order = Integer.parseInt(args[i]);
break;
case "-loss":
i++;
mst.LossType = args[i];
break;
case "-format":
i++;
mst.FileType = args[i].toUpperCase();
break;
// -- mate parameters --//
case "-threshold":
i++;
mate.NonProjectivityThreshold = Double.parseDouble(args[i]);
break;
case "-creation":
i++;
mate.FeatureCreation = args[i];
break;
case "-core":
i++;
mate.Cores = Integer.parseInt(args[i]);
break;
// -- stack param --//
case "-t":
i++;
maltL0.Gold = mstL0.Gold = args[i];
break;
case "-l":
i++;
level = args[i];
break;
case "-l0_part":
i++;
maltL0.AugmentNParts = mstL0.AugmentNParts = Integer.parseInt(args[i]);
break;
case "-l0_out_train":
i++;
mstL1.Input = maltL0.AugmentedTrainFile = mstL0.AugmentedTrainFile = args[i];
break;
case "-l0_parser":
i++;
parserL0 = getParserType(args[i]);
break;
// -- malt stack parameters --//
case "-l0_option":
i++;
maltL0.OptionsFile = args[i];
break;
case "-l0_guide":
i++;
maltL0.GuidesFile = args[i];
break;
// -- mst stack parameters --//
case "-l0_iter":
i++;
mstL0.Iteration = Integer.parseInt(args[i]);
break;
case "-l0_decode":
i++;
mstL0.DecodeType = args[i].toLowerCase();
break;
case "-l0_k":
i++;
mstL0.TrainingK = Integer.parseInt(args[i]);
break;
case "-l0_order":
i++;
mstL0.Order = Integer.parseInt(args[i]);
break;
case "-l0_loss":
i++;
mstL0.LossType = args[i];
break;
case "-l0_out_parse":
i++;
mstL1.Gold = maltL0.Output = mstL0.Output = args[i];
break;
case "-l1_parser":
i++;
parserL1 = getParserType(args[i]);
break;
case "-l1_pe":
i++;
mstL1.UsePredEdge = args[i].equals("1");
break;
case "-l1_ps":
i++;
mstL1.UsePrevSibling = args[i].equals("1");
break;
case "-l1_ns":
i++;
mstL1.UseNextSibling = args[i].equals("1");
break;
case "-l1_gp":
i++;
mstL1.UseGrandParents = args[i].equals("1");
break;
case "-l1_ac":
i++;
mstL1.UseAllchildren = args[i].equals("1");
break;
case "-l1_ph":
i++;
mstL1.UsePredHead = args[i].equals("1");
break;
case "-l1_v":
i++;
mstL1.UseValency = args[i].equals("1");
break;
// -- mst stack parameters --//
case "-l1_iter":
i++;
mstL1.Iteration = Integer.parseInt(args[i]);
break;
case "-l1_decode":
i++;
mstL1.DecodeType = args[i].toLowerCase();
break;
case "-l1_k":
i++;
mstL1.TrainingK = Integer.parseInt(args[i]);
break;
case "-l1_order":
i++;
mstL1.Order = Integer.parseInt(args[i]);
break;
case "-l1_loss":
i++;
mstL1.LossType = args[i];
break;
case "-l1_output":
i++;
mstL1.Output = args[i];
break;
}
}
} catch (Exception e) {
System.out.println(e.getMessage());
exception = true;
visisble = false;
}
finally {
Settings settings = null;
switch (parser) {
case MaltParser:
malt.WorkingDirectory = "tmp" + File.separator;
settings = malt;
break;
case ClearParser:
settings = clear;
break;
case MSTParser:
settings = mst;
break;
case MateTools:
settings = mate;
break;
}
settings.Input = input;
settings.Model = model;
settings.Output = output;
Settings settingsL0 = null;
switch (parserL0) {
case MaltParser:
settingsL0 = maltL0;
break;
case MSTParser:
settingsL0 = mstL0;
break;
}
settingsL0.Input = input;
if (visisble == false) {
if (exception == true) {
showHelp(helpChart);
System.exit(1);
}
else {
File in;
File md;
Runnable run = null;
switch(mode) {
case Read:
if (input.length() == 0) {
System.out.println("input file not entered.");
System.exit(1);
}
ReadCorpus.getStatistics(input);
break;
case Proj:
if (input.length() == 0 ||
output.length() == 0 ||
model.length() == 0) {
System.out.println("some parameter not entered.");
System.exit(1);
}
in = new File(workingDir + File.separator + input);
if (!in.exists()) {
System.out.println("cannot find input file.");
System.exit(1);
}
run = new RunnableProjectivize(
null, // target
TransformType.Projectivize, // Transform type
workingDir, // Working directory
input, output, model, markingStrategy,
coveredRoot, LiftingOrder);
break;
case Deproj:
if (input.length() == 0 ||
output.length() == 0 ||
model.length() == 0) {
System.out.println("some parameter not entered.");
System.exit(1);
}
in = new File(workingDir + File.separator + input);
if (!in.exists()) {
System.out.println("cannot find input file.");
System.exit(1);
}
md = new File(workingDir + File.separator + model + ".mco");
if (!md.exists()) {
System.out.println("cannot find model file.");
System.exit(1);
}
run = new RunnableProjectivize(
null, // target
TransformType.Deprojectivize, // Transform type
workingDir, // Working directory
input, output, model, null, null, null);
break;
case Optimizer:
if (phase.equals("all")) {
optimizer.Optimizer.ExitInEnd = false;
run = new RunnableOptimizer(null, 1, input, crossVal);
run.run();
run = new RunnableOptimizer(null, 2, input, crossVal);
run.run();
run = new RunnableOptimizer(null, 3, input, crossVal);
}
else {
int p = Integer.parseInt(phase);
run = new RunnableOptimizer(null, p, input, crossVal);
}
break;
case Train:
run = new RunnableTrain(null, parser, null, settings);
break;
case Parse:
run = new RunnableParse(null, parser, null, settings);
break;
case Eval:
if (input.length() == 0 ||
output.length() == 0 ||
gold.length() == 0) {
System.out.println("some parameter not entered.");
System.exit(1);
}
in = new File(workingDir + File.separator + input);
if (!in.exists()) {
System.out.println("cannot find input file.");
System.exit(1);
}
in = new File(workingDir + File.separator + gold);
if (!in.exists()) {
System.out.println("cannot find gold file.");
System.exit(1);
}
eval.parseFile = input;
eval.goldFile = gold;
eval.outputFile = output;
MaltEvalConsole.main(eval.getParameters());
try {
BufferedReader reader = new BufferedReader(new FileReader(output));
String line;
while ((line = reader.readLine()) != null) {
System.out.println(line);
}
}
catch (Exception e) {}
break;
case Ensemble:
if (input.length() == 0 ||
output.length() == 0 ||
gold.length() == 0) {
System.out.println("some parameter not entered.");
System.exit(1);
}
String[] ins = input.split(",");
for (int i = 0; i < ins.length; i++) {
in = new File(workingDir + File.separator + ins[i]);
if (!in.exists()) {
System.out.println("cannot find input file '" + ins[i] + "'.");
System.exit(1);
}
}
in = new File(workingDir + File.separator + gold);
if (!in.exists()) {
System.out.println("cannot find gold file.");
System.exit(1);
}
List<String> sysFiles = new LinkedList<>();
Collections.addAll(sysFiles, input.split(","));
ReparseType type = ReparseType.valueOf(method);
run = new RunnableHybrid(null, null,
gold, sysFiles, output, type);
break;
case Stack:
maltL0.Model = mstL0.Model = "ModelL0.mco";
mstL1.Model = "ModelL1.mco";
switch (level) {
case "all":
run = new RunnableHybrid(null, null,
parserL0,
settingsL0);
run.run();
run = new RunnableHybrid(null, null,
parserL1,
mstL1);
break;
case "0":
run = new RunnableHybrid(null, null,
parserL0, settingsL0);
break;
case "1":
run = new RunnableHybrid(null, null,
parserL1, mstL1);
break;
}
break;
case None:
showHelp(helpChart);
break;
}
if (run != null) {
run.run();
}
}
System.exit(0);
}
else {
showHelp(helpChart);
}
}
DependencyParserApp application = new DependencyParserApp();
try {
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
SwingUtilities.updateComponentTreeUI(application);
application.pack();
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException | UnsupportedLookAndFeelException e) {
}
application.setVisible(visisble);
}
private static ParserType getParserType(String parser) {
switch(parser.toLowerCase()) {
default:
case "malt":
return ParserType.MaltParser;
case "clear":
return ParserType.ClearParser;
case "mst":
return ParserType.MSTParser;
case "mate":
return ParserType.MateTools;
}
}
private static void showIntroduction() {
System.out.println("-----------------------------------------------------------------------------");
System.out.println(" Dependency Parser Toolbox 1.0");
System.out.println("-----------------------------------------------------------------------------");
System.out.println(" Mojtaba Khallash");
System.out.println();
System.out.println(" Iran University of Science and Technology (IUST)");
System.out.println(" Iran");
System.out.println("-----------------------------------------------------------------------------");
System.out.println();
}
private static void showHelp(Flowchart chart) {
System.out.println("Required Arguments:");
System.out.println(" -v <visibility (0|1)>");
System.out.println(" -mode <operation-mode(read|proj|deproj|optimizer|");
System.out.println(" train|parse|eval|ensemble|stack)>\n");
switch(chart) {
case None:
System.out.println(" Use (-help <mode-value>) for more parameters.");
break;
case Read:
System.out.println(" >> read: reading corpus and get statistical info");
System.out.println(" -i <input conll file>");
break;
case Proj:
System.out.println(" >> proj: Projectivizing treebank");
System.out.println(" -i <input conll file>");
System.out.println(" -o <projectivized output>");
System.out.println(" -m <projectivizing model name>");
System.out.println(" -mark <marking-strategy (None|Baseline|Head|Path|Head+Path)");
System.out.println(" [default: Head]>");
System.out.println(" -covered <covered-root (None|Ignore|Left|Right|Head) [default: Head]>");
System.out.println(" -lift_order <lifting-order (Shortest|Deepest) [default: Shortest]>");
break;
case Deproj:
System.out.println(" >> deproj: Deprojectivize treebank");
System.out.println(" -i <input conll file>");
System.out.println(" -m <existing projectivizing model name>");
System.out.println(" -o <deprojectivizing output>");
break;
case Optimizer:
System.out.println(" >> optimizer: Optimizing algorithm parameters and feature model");
System.out.println(" -i <training-corput>");
System.out.println(" -parser <parser-type (malt)>");
System.out.println(" -phase <optimizing phase (1|2|3|all) [default: all for running all phases]>");
System.out.println(" -cross_val <using 5-fold cross-validation (0|1) [default: 0]>");
break;
case Train:
System.out.println(" >> train: Training from annotated data");
System.out.println(" -i <input training corpus>");
System.out.println(" -m <name of training model>");
System.out.println(" -parser <parser-type (malt|clear|mst|mate) [default: malt]>");
System.out.println(" >> malt parameters:");
System.out.println(" -option <option-file>");
System.out.println(" -guide <guide-file>");
System.out.println(" >> clear parameters:");
System.out.println(" -option <option-file>");
System.out.println(" -guide <guide-file>");
System.out.println(" -bootstrap <bootstrapping-level [default: 2]>");
System.out.println(" >> mst parameters:");
System.out.println(" -decode <decode-type (proj|non-proj) [default: non-proj]>");
System.out.println(" -loss <loss-type (punc|nopunc) [default: punc]>");
System.out.println(" -order <order (1|2) [default: 2]>");
System.out.println(" -k <training k-best [default: 1]>");
System.out.println(" -iter <training iterations [default: 10]>");
System.out.println(" -format <file-type (conll|mst) [default: conll]>");
System.out.println(" >> mate parameters:");
System.out.println(" -decode <decode-type (proj|non-proj) [default: non-proj]>");
System.out.println(" -threshold <nonprojective-threshold (0-1) [default: 0.3]>");
System.out.println(" -creation <feature-creation (multiplicative|shift) [default: multiplicative]>");
System.out.println(" -core <number-of-core [default: max-exiting-cores]>");
System.out.println(" -iter <training iterations [default: 10]>");
break;
case Parse:
System.out.println(" >> parse: Parsing with trained model");
System.out.println(" -i <input parsing file>");
System.out.println(" -m <name of trined model>");
System.out.println(" -o <output parsed name>");
System.out.println(" -parser <parser-type (malt|clear|mst|mate) [default: malt]>");
System.out.println(" >> malt parameters: [None]");
System.out.println(" >> clear parameters:");
System.out.println(" -option <option-file>");
System.out.println(" >> mst parameters:");
System.out.println(" -decode <decode-type (proj|non-proj) [default: non-proj]>");
System.out.println(" -order <order (1|2) [default: 2]>");
System.out.println(" -format <file-type (conll|mst) [default: conll]>");
System.out.println(" >> mate parameters:");
System.out.println(" -decode <decode-type (proj|non-proj) [default: non-proj]>");
System.out.println(" -threshold <nonprojective-threshold (0-1) [default: 0.3]>");
System.out.println(" -creation <feature-creation (multiplicative|shift) [default: multiplicative]>");
System.out.println(" -core <number-of-core [default: max-exiting-cores]>");
break;
case Eval:
System.out.println(" >> eval: Evaluating parsed file with gold data");
System.out.println(" -i <input parsed file>");
System.out.println(" -g <gold file>");
System.out.println(" -o <output eval log>");
System.out.println(" -metric <metric (LAS|LA|UAS|AnyRight|AnyWrong|BothRight|BothWrong|HeadRight|HeadWrong|");
System.out.println(" LabelRight|LabelWrong|DirectionRight|GroupedHeadToChildDistanceRight|");
System.out.println(" HeadToChildDistanceRight) [default: LAS]");
System.out.println(" NOTE: for selecting multiple metrics, separate them by comma.>");
System.out.println(" -group <group-by (Token|Wordform|Lemma|Cpostag|Postag|Feats|Deprel|Sentence|RelationLength|");
System.out.println(" GroupedRelationLength|SentenceLength|StartWordPosition|EndWordPosition|");
System.out.println(" ArcDirection|ArcDepth|BranchingFactor|ArcProjectivity|Frame) [default: Token]>");
// and ...
break;
case Ensemble:
System.out.println(" >> ensemble: Ensemble for combining base parsers in parse time");
System.out.println(" -i <input baseline parsers file (separate by comma)>");
System.out.println(" -g <gold file>");
System.out.println(" -o <output file>");
System.out.println(" -method <method (majority|attardi|eisner|chu_liu_edmond) [default: majority]>");
break;
case Stack:
System.out.println(" >> stack: Stacking for combining base parsers in train time");
System.out.println(" -i <input train file>");
System.out.println(" -t <input test file>");
System.out.println(" -l <level (0|1|all) [default: all for running both level]>");
System.out.println(" -l0_part <level0 augmented parts [default: 5]>");
System.out.println(" -l0_out_train <level0 output augmented train>");
System.out.println(" -l0_out_parse <level0 output ougmented parse>");
System.out.println(" -l0_parser <level0 parser-type (malt|mst) [default: mst]>");
System.out.println(" >> malt parameters:");
System.out.println(" -l0_option <level0 option-file>");
System.out.println(" -l0_guide <level0 guide-file>");
System.out.println(" >> mst parameters:");
System.out.println(" -l0_decode <level0 decode-type (proj|non-proj) [default: non-proj]>");
System.out.println(" -l0_loss <level0 loss-type (punc|nopunc) [default: punc]>");
System.out.println(" -l0_order <level0 order (1|2) [default: 2]>");
System.out.println(" -l0_k <level0 training k-best [default: 1]>");
System.out.println(" -l0_iter <level0 training iterations [default: 10]>");
System.out.println(" -l1_pe <level1 use predicted edge (0|1) [default: 1]>");
System.out.println(" -l1_ps <level1 use previous sibling (0|1) [default: 1]>");
System.out.println(" -l1_ns <level1 use next sibling (0|1) [default: 1]>");
System.out.println(" -l1_gp <level1 use grandparent (0|1) [default: 1]>");
System.out.println(" -l1_ac <level1 use all childs (0|1) [default: 1]>");
System.out.println(" -l1_ph <level1 use predicted head (0|1) [default: 1]>");
System.out.println(" -l1_v <level1 use valency (0|1) [default: 1]>");
System.out.println(" -l1_parser <level1 parser-type (mst) [default: mst]>");
System.out.println(" >> mst parameters:");
System.out.println(" -l1_decode <level1 decode-type (proj|non-proj) [default: non-proj]>");
System.out.println(" -l1_loss <level1 loss-type (punc|nopunc) [default: punc]>");
System.out.println(" -l1_order <level1 order (1|2) [default: 2]>");
System.out.println(" -l1_k <level1 training k-best [default: 1]>");
System.out.println(" -l1_iter <level1 training iterations [default: 10]>");
System.out.println(" -l1_output <level1 parsed output>");
break;
}
}
private static Flowchart getChart(String mode) {
switch(mode) {
case "read":
return Flowchart.Read;
case "proj":
return Flowchart.Proj;
case "deproj":
return Flowchart.Deproj;
case "optimizer":
return Flowchart.Optimizer;
case "train":
return Flowchart.Train;
case "parse":
return Flowchart.Parse;
case "eval":
return Flowchart.Eval;
case "ensemble":
return Flowchart.Ensemble;
case "stack":
return Flowchart.Stack;
}
return Flowchart.None;
}
}