package net.sf.jlinkgrammar; /* * Parser.java * * Created on October 20, 2006, 3:02 PM * * To change this template, choose Tools | Options and locate the template under * the Source Creation and Management node. Right-click the template and choose * Open. You can then make changes to the template in the Source Editor. */ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; /** * This class is meant to be a bean type interface to link grammar. All the * options are preset and specific actions have to be taken to override them. * @author johnryan */ public class Parser { private static Dictionary dict; private static Sentence sent; private static String dictionary_file = null; private static String post_process_knowledge_file = null; private static String constituent_knowledge_file = null; private static String affix_file = null; private static boolean pp_on = true; private static boolean af_on = true; private static boolean cons_on = true; private static int num_linkages; private static StringBuffer input_string = new StringBuffer(); private static int label = GlobalBean.NOT_LABEL; private static ParseOptions opts; /** * Creates a new instance of Parser */ public Parser() { String[] args = new String[1]; args[0] = new String("parseit"); InitializeVars(args); } public static void InitializeVars(String arg[]) { int i = 0; if (arg.length > 1 && (arg[0].charAt(0) != '-')) { /* the dictionary is the first argument if it doesn't begin with "-" */ dictionary_file = arg[0]; i++; } opts = new ParseOptions(); GlobalBean.opts = opts; // opts = new ParseOptions(); opts.parse_options_set_max_sentence_length(70); opts.parse_options_set_linkage_limit(1000); opts.parse_options_set_short_length(10); for (; i < arg.length; i++) { if (arg[i].charAt(0) == '-') { if (arg[i].equals("-pp")) { if ((post_process_knowledge_file != null) || (i + 1 == arg.length)) print_usage(arg[0]); post_process_knowledge_file = arg[i + 1]; i++; } else if (arg[i].equals("-c")) { if ((constituent_knowledge_file != null) || (i + 1 == arg.length)) print_usage(arg[0]); constituent_knowledge_file = arg[i + 1]; i++; } else if (arg[i].equals("-a")) { if ((affix_file != null) || (i + 1 == arg.length)) print_usage(arg[0]); affix_file = arg[i + 1]; i++; } else if (arg[i].equals("-ppoff")) { pp_on = false; } else if (arg[i].equals("-coff")) { cons_on = false; } else if (arg[i].equals("-aoff")) { af_on = false; } else if (arg[i].equals("-batch")) { if ((opts.input != System.in) || (i + 1 == arg.length)) print_usage(arg[0]); try { opts.input = new FileInputStream(arg[i + 1]); } catch (IOException ex ) { // TODO - Do something } i++; } else if (arg[i].equals("-out")) { if ((opts.out != System.out) || (i + 1 == arg.length)) print_usage(arg[0]); try { opts.out = new PrintStream(new FileOutputStream(arg[i + 1])); } catch (IOException ex ) { // TODO - Do something } i++; } else if (arg[i].charAt(1) == '!') { } else { print_usage(arg[0]); } } else { // TODO - print_usage(arg[0]); } } if (!pp_on && post_process_knowledge_file != null) print_usage(arg[0]); if (dictionary_file == null) { // dictionary_file = defaultDataDir + "/link/4.0.dict"; dictionary_file = "4.0.dict"; // TODO: logging System.err.println("No dictionary file specified. Using " + dictionary_file + "."); } if (af_on && affix_file == null) { // affix_file = defaultDataDir + "/link/4.0.affix"; affix_file = "4.0.affix"; // TODO: logging System.err.println("No affix file specified. Using " + affix_file + "."); } if (pp_on && post_process_knowledge_file == null) { // post_process_knowledge_file = defaultDataDir + "/link/4.0.knowledge"; post_process_knowledge_file = "4.0.knowledge"; // TODO: logging System.err.println("No post process knowledge file specified. Using " + post_process_knowledge_file + "."); } if (cons_on && constituent_knowledge_file == null) { // constituent_knowledge_file = defaultDataDir + "/link/4.0.constituent-knowledge"; constituent_knowledge_file = "4.0.constituent-knowledge"; // TODO: logging System.err.println("No constituent knowledge file specified. Using " + constituent_knowledge_file + "."); } try { dict = new Dictionary(opts, dictionary_file, post_process_knowledge_file, constituent_knowledge_file, affix_file); } catch (IOException ex ) { // TODO - Do something } /* process the command line like commands */ for (i = 1; i < arg.length; i++) { if (!arg[i].equals("-pp") && !arg[i].equals("-c") && !arg[i].equals("-a")) { i++; } else if ( arg[i].charAt(0) == '-' && !arg[i].equals("-ppoff") && !arg[i].equals("-coff") && !arg[i].equals("-aoff")) { opts.issue_special_command(arg[i].substring(1), dict); } } } public static void doIt(String arg[]) throws IOException { InitializeVars(arg); /* This section is a simple example of the API for those trying to figure out how to * incorporate it into their own program. Un-comment it to see the results */ { String testString = "Which camera is small?"; // a simple test sentence int rWordIndex; int lWordIndex; String leftWord; String rightWord; String linkLabel; // Set up a quick test sent = new Sentence(testString, dict, opts); // First parse with cost 0 or 1 and no null links opts.parse_options_set_disjunct_cost(2); opts.parse_options_set_min_null_count(0); opts.parse_options_set_max_null_count(0); opts.parse_options_reset_resources(); num_linkages = sent.sentence_parse(opts); if ( num_linkages == 0) { // O.K. we have a null link (i.e. word without a link) // so allow one and try again opts.parse_options_set_min_null_count(1); opts.parse_options_set_max_null_count(sent.sentence_length()); num_linkages = sent.sentence_parse(opts); } //This is an example of the API uncomment it to see it work. // Normally you loop over linkages, here we only choose the first Linkage myLinkage = new Linkage(0, sent, opts); // Normally you loop through sublinkages myLinkage.linkage_get_num_sublinkages(); // Only choose the first sublinkage myLinkage.linkage_set_current_sublinkage(0); int numLinks = myLinkage.linkage_get_num_links(); for (int linkIndex = 0; linkIndex < numLinks; linkIndex++) { rWordIndex = myLinkage.linkage_get_link_rword(linkIndex); lWordIndex = myLinkage.linkage_get_link_lword(linkIndex); rightWord = myLinkage.word.get(rWordIndex); leftWord = myLinkage.word.get(lWordIndex); linkLabel = myLinkage.linkage_get_link_label(linkIndex); opts.out.println(leftWord + "---" + linkLabel + "---" + rightWord); } } /* * This is the standard command line parser reading from the standard input and * displaying on the standard output. */ while (GlobalBean.fget_input_string(input_string, opts.input, opts.out, opts)) { if (input_string.length() == 0) { continue; } if (input_string.equals("quit\n") || input_string.equals("exit\n")) break; if (GlobalBean.special_command(input_string, dict)) continue; if (opts.parse_options_get_echo_on()) { opts.out.println(input_string); } if (opts.parse_options_get_batch_mode()) { label = GlobalBean.strip_off_label(input_string); } /** * Create the sentence object with the inputs "sentence string, dictionary to use, options" */ sent = new Sentence(input_string.toString(), dict, opts); if (sent.sentence_length() > opts.parse_options_get_max_sentence_length()) { if (opts.verbosity > 0) { opts.out.println( "Sentence length (" + sent.sentence_length() + " words) exceeds maximum allowable (" + opts.parse_options_get_max_sentence_length() + " words)"); } continue; } /* First parse with cost 0 or 1 and no null links */ opts.parse_options_set_disjunct_cost(2); opts.parse_options_set_min_null_count(0); opts.parse_options_set_max_null_count(0); opts.parse_options_reset_resources(); num_linkages = sent.sentence_parse(opts); /* Now parse with null links */ if (num_linkages == 0 && !opts.parse_options_get_batch_mode()) { if (opts.verbosity > 0) opts.out.println("No complete linkages found."); if (opts.parse_options_get_allow_null()) { opts.parse_options_set_min_null_count(1); opts.parse_options_set_max_null_count(sent.sentence_length()); num_linkages = sent.sentence_parse(opts); } } opts.print_total_time(); if (opts.parse_options_get_batch_mode()) { GlobalBean.batch_process_some_linkages(label, sent, opts); } else { GlobalBean.process_some_linkages(sent, opts); } } if (opts.parse_options_get_batch_mode()) { opts.print_time("Total"); opts.out.println("" + GlobalBean.batch_errors + " error" + ((GlobalBean.batch_errors == 1) ? "" : "s") + "."); } } /** * Instead of printing a link diagram print an XML tree * * @param sent the sentence to print. */ public void printWordsLabelsAndLinks(Sentence sent) { CNode root, current, next, previous; Linkage linkage; int numLinkages; int num_to_query; int i; if (sent.sentence_num_linkages_found() > 0) { // We have to walk all the linakges throwing away the bad ones. num_to_query = Math.min(sent.sentence_num_linkages_post_processed(), 1000); for (i = 0; i < num_to_query; ++i) { if ((sent.sentence_num_violations(i) > 0) && (!opts.parse_options_get_display_bad())) { continue; } // O.K. we have our fisrt valid linkage. Do we want to print them all? No just one. // TODO - optimize this somehow linkage = new Linkage(i, sent, opts); // linkage = new Linkage(0, sent, opts); int j, first_sublinkage; // In effect we are saying display sublinkages linkage.linkage_compute_union(); numLinkages = linkage.linkage_get_num_sublinkages(); first_sublinkage = numLinkages - 1; for (j = first_sublinkage; j < numLinkages; ++j) { linkage.linkage_set_current_sublinkage(j); root = linkage.linkage_constituent_tree(); // Now we can walk the linkage and print the structure current = root; int w = 0; do { opts.out.println(linkage.word.get(w++).toString()); displayCNode(current); } while (current.next != null); // string = linkage_print_diagram(); // opts.out.println(string); } } } } private static void print_usage(String arg){ System.out.println(arg); } private static void displayCNode(CNode current){ System.out.println(current.label); } }