package edu.stanford.nlp.parser.charniak;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.*;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
/**
* Runs charniak parser using command line
*
* @author Angel Chang
*/
public class CharniakParser {
private final static Logger logger = Logger.getLogger(CharniakParser.class.getName());
private static final String CHARNIAK_DIR = "/u/nlp/packages/bllip-parser/";
// note: this is actually the parser+reranker (will use 2 CPUs)
private static final String CHARNIAK_BIN = "./parse-50best.sh";
private final CharniakScoredParsesReaderWriter scoredParsesReaderWriter = new CharniakScoredParsesReaderWriter();
private String dir = CHARNIAK_DIR;
private String parserExecutable = CHARNIAK_BIN;
/** Do not parse sentences larger than this sentence length */
private int maxSentenceLength = 400;
private int beamSize = 0;
public CharniakParser() {}
public CharniakParser(String dir, String parserExecutable) {
this.parserExecutable = parserExecutable;
this.dir = dir;
}
public int getBeamSize() {
return beamSize;
}
public void setBeamSize(int beamSize) {
this.beamSize = beamSize;
}
public int getMaxSentenceLength() {
return maxSentenceLength;
}
public void setMaxSentenceLength(int maxSentenceLength) {
this.maxSentenceLength = maxSentenceLength;
}
public Tree getBestParse(List<? extends HasWord> sentence)
{
ScoredObject<Tree> scoredParse = getBestScoredParse(sentence);
return (scoredParse != null)? scoredParse.object():null;
}
public ScoredObject<Tree> getBestScoredParse(List<? extends HasWord> sentence)
{
List<ScoredObject<Tree>> kBestParses = getKBestParses(sentence, 1);
if (kBestParses != null) {
return kBestParses.get(0);
}
return null;
}
public List<ScoredObject<Tree>> getKBestParses(List<? extends HasWord> sentence, int k)
{
return getKBestParses(sentence, k, true);
}
public List<ScoredObject<Tree>> getKBestParses(List<? extends HasWord> sentence, int k, boolean deleteTempFiles)
{
try {
File inFile = File.createTempFile("charniak.", ".in");
if (deleteTempFiles) inFile.deleteOnExit();
File outFile = File.createTempFile("charniak.", ".out");
if (deleteTempFiles) outFile.deleteOnExit();
File errFile = File.createTempFile("charniak.", ".err");
if (deleteTempFiles) errFile.deleteOnExit();
printSentence(sentence, inFile.getAbsolutePath());
runCharniak(k, inFile.getAbsolutePath(), outFile.getAbsolutePath(), errFile.getAbsolutePath());
Iterable<List<ScoredObject<Tree>>> iter = scoredParsesReaderWriter.readScoredTrees(outFile.getAbsolutePath());
if (deleteTempFiles) {
inFile.delete();
outFile.delete();
errFile.delete();
}
return iter.iterator().next();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
public Iterable<List<ScoredObject<Tree>>> getKBestParses(Iterable<List<? extends HasWord>> sentences, int k)
{
return getKBestParses(sentences, k, true);
}
public Iterable<List<ScoredObject<Tree>>> getKBestParses(Iterable<List<? extends HasWord>> sentences, int k, boolean deleteTempFiles)
{
try {
File inFile = File.createTempFile("charniak.", ".in");
if (deleteTempFiles) inFile.deleteOnExit();
File outFile = File.createTempFile("charniak.", ".out");
if (deleteTempFiles) outFile.deleteOnExit();
File errFile = File.createTempFile("charniak.", ".err");
if (deleteTempFiles) errFile.deleteOnExit();
printSentences(sentences, inFile.getAbsolutePath());
runCharniak(k, inFile.getAbsolutePath(), outFile.getAbsolutePath(), errFile.getAbsolutePath());
Iterable<List<ScoredObject<Tree>>> iter = scoredParsesReaderWriter.readScoredTrees(outFile.getAbsolutePath());
if (deleteTempFiles) {
inFile.delete();
outFile.delete();
errFile.delete();
}
return new IterableIterator<>(iter.iterator());
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
public void printSentence(List<? extends HasWord> sentence, String filename)
{
List<List<? extends HasWord>> sentences = new ArrayList<>();
sentences.add(sentence);
printSentences(sentences, filename);
}
public void printSentences(Iterable<List<? extends HasWord>> sentences, String filename)
{
try {
PrintWriter pw = IOUtils.getPrintWriter(filename);
for (List<? extends HasWord> sentence:sentences) {
pw.print("<s> "); // Note: Use <s sentence-id > to identify sentences
String sentString = SentenceUtils.listToString(sentence);
if (sentence.size() > maxSentenceLength) {
logger.warning("Sentence length=" + sentence.size() +
" is longer than maximum set length " + maxSentenceLength);
logger.warning("Long Sentence: " + sentString);
}
pw.print(sentString);
pw.println(" </s>");
}
pw.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
public void runCharniak(int n, String infile, String outfile, String errfile)
{
try {
if (n == 1) n++; // Charniak does not output score if n = 1?
List<String> args = new ArrayList<>();
args.add(parserExecutable);
args.add(infile);
ProcessBuilder process = new ProcessBuilder(args);
process.directory(new File(this.dir));
PrintWriter out = IOUtils.getPrintWriter(outfile);
PrintWriter err = IOUtils.getPrintWriter(errfile);
SystemUtils.run(process, out, err);
out.close();
err.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
}