package ir.ac.iust.nlp.dependencyparser.inputoutput; import java.util.LinkedList; import java.util.List; import org.maltparser.core.exception.MaltChainedException; import org.maltparser.core.io.dataformat.DataFormatInstance; import org.maltparser.core.io.dataformat.DataFormatSpecification; import org.maltparser.core.symbol.SymbolTableHandler; import org.maltparser.core.symbol.trie.TrieSymbolTableHandler; import org.maltparser.core.syntaxgraph.DependencyGraph; import org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader; import org.maltparser.core.syntaxgraph.reader.TabReader; import org.maltparser.core.syntaxgraph.writer.SyntaxGraphWriter; import org.maltparser.core.syntaxgraph.writer.TabWriter; /** * This example reads dependency graphs formatted according to the CoNLL format * and writes the graphs to another file. * * @author Johan Hall */ public class ReadWriteCoNLL { private DependencyGraph inputGraph; private SyntaxGraphReader tabReader; private SyntaxGraphWriter tabWriter; private boolean moreInput = false; private SymbolTableHandler symbolTables; public ReadWriteCoNLL(String dataFormatFileName) throws MaltChainedException { // Creates a symbol table handler symbolTables = new TrieSymbolTableHandler( TrieSymbolTableHandler.ADD_NEW_TO_TRIE); // Initialize data format instance of the CoNLL data format from conllx.xml (conllx.xml located in same directory) DataFormatSpecification dataFormat = new DataFormatSpecification(); dataFormat.parseDataFormatXMLfile(dataFormatFileName); DataFormatInstance dataFormatInstance = dataFormat.createDataFormatInstance(symbolTables, "none"); // Creates a dependency graph inputGraph = new DependencyGraph(symbolTables); // Creates a tabular reader with the CoNLL data format tabReader = new TabReader(); tabReader.setDataFormatInstance(dataFormatInstance); // Creates a tabular writer with the CoNLL data format tabWriter = new TabWriter(); tabWriter.setDataFormatInstance(dataFormatInstance); } public void run(String inFile, String outFile, String charSet) throws MaltChainedException { // Opens the input and output file with a character encoding set tabReader.open(inFile, charSet); tabWriter.open(outFile, charSet); moreInput = true; // Reads Sentences until moreInput is false while (moreInput) { // Read One "Sentence" and Create "Dependency Graph" correspond to it // and set it to inputGraph moreInput = tabReader.readSentence(inputGraph); if (inputGraph.hasTokens()) { tabWriter.writeSentence(inputGraph); } } // Closes the reader and writer tabReader.close(); tabWriter.close(); } // Opens the input file with a character encoding set [My Method] public void initRead(String inFile, String charSet) throws MaltChainedException { tabReader.open(inFile, charSet); moreInput = true; } // Read All Sentences and return All Dependency Graprhs [My Method] public List<DependencyGraph> readAll() throws MaltChainedException { List<DependencyGraph> all = new LinkedList<>(); // Reads Sentences until moreInput is false while (moreInput) { // Read One "Sentence" and Create "Dependency Graph" correspond to it // and set it to inputGraph moreInput = tabReader.readSentence(inputGraph); if (inputGraph.hasTokens()) { all.add(inputGraph); } inputGraph = new DependencyGraph(symbolTables); } terminateRead(); return all; } // Read Next Sentences and return Corresponding Dependency Graprh [My Method] public DependencyGraph readNext() throws MaltChainedException { // Reads Sentences until moreInput is false if (moreInput) { // Read One "Sentence" and Create "Dependency Graph" correspond to it moreInput = tabReader.readSentence(inputGraph); if (inputGraph.hasTokens()) { return inputGraph; } } else { terminateRead(); } return null; } // Close reader [My Method] public void terminateRead() throws MaltChainedException { // Closes the reader tabReader.close(); } // Opens the output file with a character encoding set [My Method] public void initWrite(String outFile, String charSet) throws MaltChainedException { tabWriter.open(outFile, charSet); } // Write All Dependency Graprhs in output file [My Method] public void writeAll(List<DependencyGraph> dgs) throws MaltChainedException { for (int i = 0; i < dgs.size(); i++) { inputGraph = dgs.get(i); if (inputGraph.hasTokens()) { tabWriter.writeSentence(inputGraph); } } terminateWrite(); } // Write Next Dependency Graprh to output file [My Method] public void writeNext(DependencyGraph dg) throws MaltChainedException { if (dg.hasTokens()) { tabWriter.writeSentence(dg); } } // Close writer [My Method] public void terminateWrite() throws MaltChainedException { // Closes the writer tabWriter.close(); } }