package ir.ac.iust.nlp.dependencyparser.parsing;
import java.io.*;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.maltparser.MaltParserService;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.symbol.SymbolTable;
import org.maltparser.core.syntaxgraph.DependencyStructure;
import org.maltparser.core.syntaxgraph.edge.Edge;
import org.maltparser.core.syntaxgraph.node.DependencyNode;
public class ParsingUtility {
/*
* To run this example requires that you have ran TrainingExperiment that
* creates model0.mco.
* Sample sentenceTokens
* // Creates an array of tokens, which contains the Swedish sentence
* // 'Grundavdraget upphör alltså vid en taxerad inkomst på 52500 kr.'
* // in the CoNLL data format. String[]
* tokens = new String[11]; tokens[0] = "1\tGrundavdraget\t_\tN\tNN\tDD|SS";
* tokens[1] = "2\tupphör\t_\tV\tVV\tPS|SM";
* tokens[2] = "3\talltså\t_\tAB\tAB\tKS";
* tokens[3] = "4\tvid\t_\tPR\tPR\t_";
* tokens[4] = "5\ten\t_\tN\tEN\t_";
* tokens[5] = "6\ttaxerad\t_\tP\tTP\tPA";
* tokens[6] = "7\tinkomst\t_\tN\tNN\t_";
* tokens[7] = "8\tpå\t_\tPR\tPR\t_";
* tokens[8] = "9\t52500\t_\tR\tRO\t_";
* tokens[9] = "10\tkr\t_\tN\tNN\t_";
* tokens[10] = "11\t.\t_\tP\tIP\t_";
*
* @author Johan Hall
*/
public static DependencyStructure ParseSentence(
String[] sentenceTokens, String workingDir,
String modelPath, String logFile) throws MaltChainedException {
return (DependencyStructure) ParseFromInputToken(1, sentenceTokens, workingDir,
modelPath, logFile);
}
public static String[] ParseSentence1(
String[] sentenceTokens, String workingDir,
String modelPath, String logFile) throws MaltChainedException {
return (String[]) ParseFromInputToken(2, sentenceTokens, workingDir,
modelPath, logFile);
}
private static Object ParseFromInputToken(int version, String[] tokens,
String workingDir, String modelName, String logName)
throws MaltChainedException {
Object result = null;
MaltParserService service = null;
try {
service = new MaltParserService();
// Inititalize:
// 1. the parser model modelPath
// 2. the working directory workingDir
// 3. the logging file logName
service.initializeParserModel(
"-c " + modelName
+ " -m parse"
+ " -w " + workingDir
+ " -lfi " + logName);
switch (version) {
/*
* This example shows how to parse a sentence with MaltParser by
* first initialize a parser model.
*/
case 1:
// Parses the Swedish sentence above
DependencyStructure graph = service.parse(tokens);
// Outputs the dependency graph created by MaltParser.
result = graph;
break;
/*
* This example shows how to parse a sentence with MaltParser by
* first initialize a parser model. This example is the same as
* case 1 except that we use the parseTokens method in
* MaltParserService that returns an array of tokens with
* information about it head index and dependency type.
*/
case 2:
// Parses the Swedish sentence above
result = service.parseTokens(tokens);
break;
}
}
finally {
if (service != null) {
// Terminates the parser model
service.terminateParserModel();
}
}
return result;
}
/*
* This example shows how to parse sentences from file. The only difference
* between example case 1 is that the input is read from file
* 'data/talbanken05_test.conll' and written to 'out.conll' in the CoNLL
* data format.
*
* To run this example requires that you have ran TrainingExperiment that
* creates model0.mco
*
* @author Johan Hall
*/
public static List<DependencyStructure> ParseFromFile(String workingDir,
String modelName, String inputPath, String outputPath,
String logName) throws MaltChainedException {
List<DependencyStructure> dss = new LinkedList<>();
BufferedReader reader = null;
BufferedWriter writer = null;
MaltParserService service = null;
try {
service = new MaltParserService();
// Inititalize:
// 1. the parser model modelName
// 2. the working directory workingDir
// 3. the logging file logName
service.initializeParserModel(
"-c " + modelName
+ " -m parse"
+ " -w " + workingDir
+ " -lfi " + logName);
reader = new BufferedReader(new InputStreamReader(
new FileInputStream(workingDir + File.separator + inputPath), "UTF-8"));
writer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(workingDir + File.separator + outputPath), "UTF-8"));
String line;
ArrayList<String> lines = new ArrayList<>();
while ((line = reader.readLine()) != null) {
if (line.trim().length() == 0) {
DependencyStructure graph = service.parse(lines.toArray(new String[lines.size()]));
dss.add(graph);
for (int i = 1; i <= graph.getHighestDependencyNodeIndex(); i++) {
DependencyNode node = graph.getDependencyNode(i);
if (node != null) {
for (SymbolTable table : node.getLabelTypes()) {
writer.write(node.getLabelSymbol(table) + "\t");
}
if (node.hasHead()) {
Edge e = node.getHeadEdge();
writer.write(e.getSource().getIndex() + "\t");
if (e.isLabeled()) {
for (SymbolTable table : e.getLabelTypes()) {
writer.write(e.getLabelSymbol(table) + "\t");
}
} else {
for (SymbolTable table : graph.getDefaultRootEdgeLabels().keySet()) {
writer.write(graph.getDefaultRootEdgeLabelSymbol(table) + "\t");
}
}
}
writer.write('\n');
writer.flush();
}
}
writer.write('\n');
writer.flush();
lines.clear();
} else {
lines.add(line);
}
}
} finally {
if (reader != null) {
try { reader.close(); }
catch(Exception ex){}
}
if (writer != null) {
try {
writer.flush();
writer.close();
} catch(Exception ex){}
}
if (service != null) {
service.terminateParserModel();
}
return dss;
}
}
/**
*
* To run this example requires that you have ran TrainingExperiment that creates model0.mco and model1.mco
*
* @author Johan Hall
*/
public static void ParseFromFile1(String workingDir, String modelName,
String inputPath, String outputPath, String logName)
throws MaltChainedException {
// Inititalize:
// 1. the parser model modelName
// 2. the working directory workingDir
// 3. the logging file logName
new MaltParserService(0).runExperiment(
"-c " + modelName
+ " -m parse"
+ " -i "+ workingDir + File.separator + inputPath
+ " -o " + workingDir + File.separator + outputPath
+ " -w " + workingDir
+ " -lfi " + logName);
}
}