package edu.berkeley.nlp.PCFGLA;
import edu.berkeley.nlp.PCFGLA.smoothing.SmoothAcrossParentBits;
import edu.berkeley.nlp.PCFGLA.smoothing.SmoothAcrossParentSubstate;
import edu.berkeley.nlp.PCFGLA.smoothing.Smoother;
import edu.berkeley.nlp.io.PTBLineLexer;
import edu.berkeley.nlp.io.PTBTokenizer;
import edu.berkeley.nlp.io.PTBLexer;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees;
import edu.berkeley.nlp.ui.TreeJPanel;
import edu.berkeley.nlp.util.CommandLineUtils;
import edu.berkeley.nlp.util.Numberer;
import edu.berkeley.nlp.util.Pair;
import java.awt.AlphaComposite;
import java.awt.Graphics2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.imageio.ImageIO;
import javax.swing.JFrame;
/**
*
* @author Slav Petrov
*/
public class SentenceSegmenter {
static TreeJPanel tjp;
static JFrame frame;
public static class Options {
@Option(name = "-gr", required = true, usage = "Grammarfile (Required)\n")
public String grFileName;
@Option(name = "-tokenize", usage = "Tokenize input first. (Default: false=text is already tokenized)")
public boolean tokenize;
@Option(name = "-accurate", usage = "Set thresholds for accuracy. (Default: set thresholds for efficiency)")
public boolean accurate;
@Option(name = "-constituent", usage = "Instead of sentence probabilities return constituent probabilities")
public boolean constituent = false;
@Option(name = "-inputFile", usage = "Read input from this file instead of reading it from STDIN.")
public String inputFile;
@Option(name = "-outputFile", usage = "Store output in this file instead of printing it to STDOUT.")
public String outputFile;
}
@SuppressWarnings("unchecked")
public static void main(String[] args) {
OptionParser optParser = new OptionParser(Options.class);
Options opts = (Options) optParser.parse(args, true);
double threshold = 1.0;
String inFileName = opts.grFileName;
ParserData pData = ParserData.Load(inFileName);
if (pData==null) {
System.out.println("Failed to load grammar from file"+inFileName+".");
System.exit(1);
}
Grammar grammar = pData.getGrammar();
Lexicon lexicon = pData.getLexicon();
Numberer.setNumberers(pData.getNumbs());
CoarseToFineMaxRuleParser parser = null;
parser = new CoarseToFineMaxRuleParser(grammar, lexicon, threshold,-1,false,false,false, opts.accurate, false, true, true);
parser.binarization = pData.getBinarization();
try{
BufferedReader inputData = (opts.inputFile==null) ? new BufferedReader(new InputStreamReader(System.in)) : new BufferedReader(new InputStreamReader(new FileInputStream(opts.inputFile), "UTF-8"));
PrintWriter outputData = (opts.outputFile==null) ? new PrintWriter(new OutputStreamWriter(System.out)) : new PrintWriter(new OutputStreamWriter(new FileOutputStream(opts.outputFile), "UTF-8"), true);
PTBLineLexer tokenizer = null;
if (opts.tokenize) tokenizer = new PTBLineLexer();
String line = "";
while((line=inputData.readLine()) != null){
List<String> sentence = null;
List<String> posTags = null;
String[] parts = line.split("\t");
if (parts.length<3) continue;
int nPoints = Integer.parseInt(parts[0]);
List<Pair<Integer,Integer>> points = new ArrayList<Pair<Integer,Integer>>(nPoints);
String[] segments = parts[1].split("\\(");
for (int i=1; i<=nPoints; i++){
String[] numbers = segments[i].split(" ");
String n0 = numbers[0];
String n1 = numbers[1].substring(0,numbers[1].length()-1);
Pair<Integer, Integer> number = new Pair<Integer, Integer>(Integer.parseInt(n0), Integer.parseInt(n1));
points.add(number);
}
if (!opts.tokenize) sentence = Arrays.asList(parts[parts.length-1].split(" "));
else sentence = tokenizer.tokenizeLine(parts[parts.length-1]);
// if (sentence.size()==0) { outputData.write("\n"); continue; }//break;
if (sentence.size()>=200) { sentence = new ArrayList<String>(); System.err.println("Skipping sentence with "+sentence.size()+" words since it is too long.");continue; }//break;
Tree<String> parsedTree = parser.getBestConstrainedParse(sentence,posTags,null);
double allLL = (parsedTree.getChildren().isEmpty()) ? Double.NEGATIVE_INFINITY : parser.getLogLikelihood();
outputData.write(allLL+" ");
for (Pair<Integer,Integer> point : points){
double partLL = parser.getSentenceProbability(point.getFirst(), point.getSecond(), opts.constituent);
outputData.write(partLL+" ");
}
outputData.write("\n");
}
outputData.flush();
outputData.close();
} catch (Exception ex) {
ex.printStackTrace();
}
System.exit(0);
}
// /**
// * @param parsedTree
// * @param outputData
// * @param opts
// */
// private static void outputTrees(List<Tree<String>> parseTrees, PrintWriter outputData,
// CoarseToFineMaxRuleParser parser, Options opts) {
// for (Tree<String> parsedTree : parseTrees){
// double allLL = (parsedTree.getChildren().isEmpty()) ? Double.NEGATIVE_INFINITY : parser.getLogLikelihood();
// outputData.write(allLL+"\n");
//// continue;
// }
// if (!opts.binarize) parsedTree = TreeAnnotations.unAnnotateTree(parsedTree);
// if (opts.confidence) {
// double treeLL = (parsedTree.getChildren().isEmpty()) ? Double.NEGATIVE_INFINITY : parser.getLogLikelihood(parsedTree);
// outputData.write(treeLL+"\t");
// }
// if (!parsedTree.getChildren().isEmpty()) {
// if (true) outputData.write("( "+parsedTree.getChildren().get(0)+" )\n");
//// else outputData.write(parsedTree.getChildren().get(0)+"\n\n");
// } else {
// if (true) outputData.write("(())\n");
//// else outputData.write("()\n\n");
// }
// }
// }
public static void writeTreeToImage(Tree<String> tree, String fileName) throws IOException{
tjp.setTree(tree);
BufferedImage bi =new BufferedImage(tjp.width(),tjp.height(),BufferedImage.TYPE_INT_ARGB);
int t=tjp.height();
Graphics2D g2 = bi.createGraphics();
g2.setComposite(AlphaComposite.getInstance(AlphaComposite.CLEAR, 1.0f));
Rectangle2D.Double rect = new Rectangle2D.Double(0,0,tjp.width(),tjp.height());
g2.fill(rect);
g2.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 1.0f));
tjp.paintComponent(g2); //paint the graphic to the offscreen image
g2.dispose();
ImageIO.write(bi,"png",new File(fileName)); //save as png format DONE!
}
}