/**
*
*/
package edu.berkeley.nlp.PCFGLA;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import edu.berkeley.nlp.util.Numberer;
/**
* @author petrov
*
*/
public class DumpGrammar {
/**
* @param args
*/
public static void main(String[] args) {
if (args.length<2) {
System.out.println(
"usage: java -cp berkeleyParser.jar edu/berkeley/nlp/parser/DumpGrammar <grammar> <output file name> [<threshold>] \n " +
"reads in a serialized grammar file and writes it to a text file."
);
System.exit(2);
}
String inFileName = args[0];
String outName = args[1];
System.out.println("Loading grammar from file "+inFileName+".");
ParserData pData = ParserData.Load(inFileName);
if (pData == null) {
System.out.println("Failed to load grammar from file" + inFileName + ".");
System.exit(1);
}
Grammar grammar = pData.getGrammar();
// if (grammar instanceof HierarchicalGrammar)
// grammar = (HierarchicalGrammar)grammar;
SophisticatedLexicon lexicon = (SophisticatedLexicon)pData.getLexicon();
Numberer.setNumberers(pData.getNumbs());
dumpGrammar(outName, grammar, lexicon);
}
/**
* @param args
* @param outName
* @param pData
* @param grammar
* @param lexicon
*/
public static void dumpGrammar(String outName, Grammar grammar, SophisticatedLexicon lexicon) {
Numberer tagNumberer = Numberer.getGlobalNumberer("tags");
grammar.splitRules();
Numberer n = Numberer.getGlobalNumberer("tags");
System.out.println("Writing output to files "+outName+".xxx");
PrintWriter out, outN;
try {
// write binary rules
out = new PrintWriter(new BufferedWriter(new FileWriter(outName+".binary")));
outN = new PrintWriter(new BufferedWriter(new FileWriter(outName+".num.binary")));
for (int state = 0; state < grammar.numStates; state++) {
BinaryRule[] parentRules = grammar.splitRulesWithP(state);
for (int i = 0; i < parentRules.length; i++) {
int number = 0;
BinaryRule r = parentRules[i];
double[][][] scores = r.getScores2();
String lState = (String)n.object(r.leftChildState);
if (lState.endsWith("^g")) lState = lState.substring(0,lState.length()-2);
String rState = (String)n.object(r.rightChildState);
if (rState.endsWith("^g")) rState = rState.substring(0,rState.length()-2);
String pState = (String)n.object(r.parentState);
if (pState.endsWith("^g")) pState = pState.substring(0,pState.length()-2);
StringBuilder sb = new StringBuilder();
for (int lS=0; lS<scores.length; lS++){
for (int rS=0; rS<scores[lS].length; rS++){
if (scores[lS][rS]==null) continue;
for (int pS=0; pS<scores[lS][rS].length; pS++){
double p = scores[lS][rS][pS];
if (p>0) {
sb.append(pState+"_"+pS+ " "+lState+"_"+lS+ " "+rState+"_"+rS +" "+p+"\n");
number++;
}
}
}
}
out.print(sb.toString());
outN.print(number+"\n");
}
}
out.flush();
outN.flush();
out.close();
outN.close();
// write unary rules
out = new PrintWriter(new BufferedWriter(new FileWriter(outName+".unary")));
outN = new PrintWriter(new BufferedWriter(new FileWriter(outName+".num.unary")));
for (int state = 0; state < grammar.numStates; state++) {
UnaryRule[] unaries = grammar.getClosedViterbiUnaryRulesByParent(state);
for (int r = 0; r < unaries.length; r++) {
int number = 0;
UnaryRule ur = unaries[r];
double[][] scores = ur.getScores2();
String cState = (String)n.object(ur.childState);
if (cState.endsWith("^g")) cState = cState.substring(0,cState.length()-2);
String pState = (String)n.object(ur.parentState);
if (pState.endsWith("^g")) pState = pState.substring(0,pState.length()-2);
StringBuilder sb = new StringBuilder();
for (int cS=0; cS<scores.length; cS++){
if (scores[cS]==null) continue;
for (int pS=0; pS<scores[cS].length; pS++){
double p = scores[cS][pS];
if (p>0) {
sb.append(pState+"_"+pS+ " " + cState+"_"+cS +" "+p+"\n");
number++;
}
}
}
out.print(sb.toString());
outN.print(number+"\n");
}
}
out.flush();
outN.flush();
out.close();
outN.close();
// split trees
grammar.writeSplitTrees(new BufferedWriter(new FileWriter(outName+".hierarchy")));
// numbsubstates
out = new PrintWriter(new BufferedWriter(new FileWriter(outName+".numstates")));
for (int state = 0; state < grammar.numStates; state++) {
String tag = (String)tagNumberer.object(state);
if (tag.endsWith("^g")) tag = tag.substring(0,tag.length()-2);
out.write(tag+"\t"+grammar.numSubStates[state]+"\n");
}
out.flush();
out.close();
// lexicon
out = new PrintWriter(new BufferedWriter(new FileWriter(outName+".lexicon")));
out.write(lexicon.toString());
out.flush();
out.close();
// words
out = new PrintWriter(new BufferedWriter(new FileWriter(outName+".words")));
for (String word : lexicon.wordCounter.keySet())
out.write(word + "\n");
out.flush();
out.close();
} catch (IOException ex) { ex.printStackTrace();}
}
}