ParsingUtility.java example

Explorer
dependency-parsing-toolbox-master
- Source
package ir.ac.iust.nlp.dependencyparser.parsing;

import java.io.*;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.maltparser.MaltParserService;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.symbol.SymbolTable;
import org.maltparser.core.syntaxgraph.DependencyStructure;
import org.maltparser.core.syntaxgraph.edge.Edge;
import org.maltparser.core.syntaxgraph.node.DependencyNode;

public class ParsingUtility {

    /*
     * To run this example requires that you have ran TrainingExperiment that
     * creates model0.mco.
     * Sample sentenceTokens 
     *      // Creates an array of tokens, which contains the Swedish sentence 
     *      // 'Grundavdraget upphör alltså vid en taxerad inkomst på 52500 kr.' 
     *      // in the CoNLL data format. String[]
     *      tokens = new String[11]; tokens[0] = "1\tGrundavdraget\t_\tN\tNN\tDD|SS";
     *      tokens[1] = "2\tupphör\t_\tV\tVV\tPS|SM"; 
     *      tokens[2] = "3\talltså\t_\tAB\tAB\tKS"; 
     *      tokens[3] = "4\tvid\t_\tPR\tPR\t_"; 
     *      tokens[4] = "5\ten\t_\tN\tEN\t_"; 
     *      tokens[5] = "6\ttaxerad\t_\tP\tTP\tPA"; 
     *      tokens[6] = "7\tinkomst\t_\tN\tNN\t_"; 
     *      tokens[7] = "8\tpå\t_\tPR\tPR\t_"; 
     *      tokens[8] = "9\t52500\t_\tR\tRO\t_"; 
     *      tokens[9] = "10\tkr\t_\tN\tNN\t_"; 
     *      tokens[10] = "11\t.\t_\tP\tIP\t_";
     *
     * @author Johan Hall
     */
    public static DependencyStructure ParseSentence(
            String[] sentenceTokens, String workingDir,
            String modelPath, String logFile) throws MaltChainedException {
        return (DependencyStructure) ParseFromInputToken(1, sentenceTokens, workingDir, 
                modelPath, logFile);
    }
    
    public static String[] ParseSentence1(
            String[] sentenceTokens, String workingDir,
            String modelPath, String logFile) throws MaltChainedException {
        return (String[]) ParseFromInputToken(2, sentenceTokens, workingDir, 
                modelPath, logFile);
    }
    
    private static Object ParseFromInputToken(int version, String[] tokens,
            String workingDir, String modelName, String logName) 
            throws MaltChainedException {
        Object result = null;
        MaltParserService service = null;
        try {
            service = new MaltParserService();

            // Inititalize:
            //      1. the parser model         modelPath
            //      2. the working directory    workingDir
            //      3. the logging file         logName
            service.initializeParserModel(
                       "-c " + modelName
                    + " -m parse"
                    + " -w " + workingDir
                    + " -lfi " + logName);
            
            switch (version) {
                /*
                 * This example shows how to parse a sentence with MaltParser by
                 * first initialize a parser model.
                 */
                case 1:
                    // Parses the Swedish sentence above
                    DependencyStructure graph = service.parse(tokens);

                    // Outputs the dependency graph created by MaltParser.
                    result = graph;
                    break;

                /*
                 * This example shows how to parse a sentence with MaltParser by
                 * first initialize a parser model. This example is the same as
                 * case 1 except that we use the parseTokens method in
                 * MaltParserService that returns an array of tokens with
                 * information about it head index and dependency type.
                 */
                case 2:
                    // Parses the Swedish sentence above
                    result = service.parseTokens(tokens);
                    break;
            }
        }
        finally {
            if (service != null) {
                // Terminates the parser model
                service.terminateParserModel();
            }
        }
        
        return result;
    }

    /*
     * This example shows how to parse sentences from file. The only difference
     * between example case 1 is that the input is read from file
     * 'data/talbanken05_test.conll' and written to 'out.conll' in the CoNLL
     * data format.
     *
     * To run this example requires that you have ran TrainingExperiment that
     * creates model0.mco
     *
     * @author Johan Hall
     */
    public static List<DependencyStructure> ParseFromFile(String workingDir, 
            String modelName, String inputPath, String outputPath, 
            String logName) throws MaltChainedException {
        List<DependencyStructure> dss  = new LinkedList<>();
        BufferedReader reader = null;
        BufferedWriter writer = null;
        MaltParserService service = null;
        try {
            service = new MaltParserService();

            // Inititalize:
            //      1. the parser model         modelName
            //      2. the working directory    workingDir
            //      3. the logging file         logName
            service.initializeParserModel(
                       "-c " + modelName 
                    + " -m parse" 
                    + " -w " + workingDir
                    + " -lfi " + logName);
                        
            reader = new BufferedReader(new InputStreamReader(
                    new FileInputStream(workingDir + File.separator + inputPath), "UTF-8"));
            
            writer = new BufferedWriter(new OutputStreamWriter(
                    new FileOutputStream(workingDir + File.separator + outputPath), "UTF-8"));
            
            String line;
            ArrayList<String> lines = new ArrayList<>();
            while ((line = reader.readLine()) != null) {
                if (line.trim().length() == 0) {
                    DependencyStructure graph = service.parse(lines.toArray(new String[lines.size()]));
                    dss.add(graph);
                    for (int i = 1; i <= graph.getHighestDependencyNodeIndex(); i++) {
                        DependencyNode node = graph.getDependencyNode(i);
                        if (node != null) {
                            for (SymbolTable table : node.getLabelTypes()) {
                                writer.write(node.getLabelSymbol(table) + "\t");
                            }
                            if (node.hasHead()) {
                                Edge e = node.getHeadEdge();
                                writer.write(e.getSource().getIndex() + "\t");
                                if (e.isLabeled()) {
                                    for (SymbolTable table : e.getLabelTypes()) {
                                        writer.write(e.getLabelSymbol(table) + "\t");
                                    }
                                } else {
                                    for (SymbolTable table : graph.getDefaultRootEdgeLabels().keySet()) {
                                        writer.write(graph.getDefaultRootEdgeLabelSymbol(table) + "\t");
                                    }
                                }
                            }
                            writer.write('\n');
                            writer.flush();
                        }
                    }
                    writer.write('\n');
                    writer.flush();
                    lines.clear();
                } else {
                    lines.add(line);
                }
            }
        } finally {
            if (reader != null) {
                try { reader.close(); } 
                catch(Exception ex){}
            }
            if (writer != null) {
                try { 
                    writer.flush();
                    writer.close();
                } catch(Exception ex){}
            }
            if (service != null) {
                service.terminateParserModel();
            }

            return dss;
        }
    }
    
    /**
    * 
    * To run this example requires that you have ran TrainingExperiment that creates model0.mco and model1.mco
    * 
    * @author Johan Hall
    */
    public static void ParseFromFile1(String workingDir, String modelName, 
            String inputPath, String outputPath, String logName) 
            throws MaltChainedException {
        // Inititalize:
        //      1. the parser model         modelName
        //      2. the working directory    workingDir
        //      3. the logging file         logName
        new MaltParserService(0).runExperiment(
                   "-c " + modelName
                + " -m parse" 
                + " -i "+ workingDir + File.separator + inputPath
                + " -o " + workingDir + File.separator + outputPath
                + " -w " + workingDir
                + " -lfi " + logName);
    }
}