package rainbownlp.parser;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.List;
import rainbownlp.util.FileUtil;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.NPTmpRetainingTreeNormalizer;
import edu.stanford.nlp.trees.PennTreeReader;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreePrint;
import edu.stanford.nlp.trees.TreebankLanguagePack;
public class StanfordParser {
static LexicalizedParser lp;
static TreebankLanguagePack tlp;
TreePrint tp = new TreePrint("wordsAndTags");
static{
tlp = new PennTreebankLanguagePack();
}
public String tagged_sentence;
public Tree bufferTree = null;
public static void main(String[] args)
{
StanfordParser parser = new StanfordParser();
parser.parse("this is a test");
String tagged = parser.getTagged();
String dependencies = parser.getDependencies();
String penn_tree = parser.getPenn();
System.out.println(tagged);
System.out.println(penn_tree);
System.out.println(dependencies);
}
public StanfordParser()
{
if(lp == null)
lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
}
public String getTagged() {
Writer parse_string = new StringWriter();
PrintWriter printWriter = new PrintWriter(parse_string);
tp.printTree(bufferTree, printWriter); // print tree
return parse_string.toString();
}
public static String getTagged(String filename, int sentence_line) {
List<String> taggs = FileUtil.loadLineByLine(filename.replace(".txt", ".pos"));
if(sentence_line-1<0)
return "";
return taggs.get(sentence_line-1);
}
public void parse(String sentence)
{
if(sentence.equals("")) return;
// prepare Parser, Tokenizer and Tree printer:
if(lp == null)
lp = LexicalizedParser.loadModel("nlpdata/englishFactored.ser.gz");
// print sentence:
// System.out.println ("\n\n\n\nORIGINAL:\n\n" + sentence);
// put tokens in a list:
Tokenizer<? extends HasWord> toke =
tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
List<? extends HasWord> sentence_tokenized = toke.tokenize();
if(sentence_tokenized.size() ==0 || (sentence_tokenized.size()==1 && sentence_tokenized.get(0).equals(""))) return;
bufferTree = lp.apply(sentence_tokenized);
}
public String getPenn()
{
if(bufferTree==null) return "";
TreePrint tp = new TreePrint("penn");
Writer parse_string = new StringWriter();
PrintWriter printWriter = new PrintWriter(parse_string);
tp.printTree(bufferTree, printWriter); // print tree
return parse_string.toString();
}
public String getDependencies()
{
TreePrint tp = new TreePrint("typedDependenciesCollapsed");
Writer parse_string = new StringWriter();
PrintWriter printWriter = new PrintWriter(parse_string);
tp.printTree(bufferTree, printWriter); // print tree
return parse_string.toString();
}
public void load(String penn) throws IOException
{
Reader in = new StringReader(penn);
PennTreeReader tr = new PennTreeReader(in, new LabeledScoredTreeFactory(),
new NPTmpRetainingTreeNormalizer());
bufferTree = tr.readTree();
}
public void load(String penn,String tagged) throws IOException
{
Reader in = new StringReader(penn);
PennTreeReader tr = new PennTreeReader(in, new LabeledScoredTreeFactory(),
new NPTmpRetainingTreeNormalizer());
bufferTree = tr.readTree();
tagged_sentence = tagged;
}
}