package net.sf.jlinkgrammar;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
/**
* This is a hacked construct. This bean holds several constructs and variables
* needed across the entire link grammar program. This should be a parent and all
* link grammar objects should derive from it. That way we better encapsulate the
* variables.
*/
public class GlobalBean {
/**
* This is a hack that allows one to discard disjuncts containing
* connectors whose cost is greater than given a bound. This number plus
* the cost of any connectors on a disjunct must remain negative, and
* this number multiplied times the number of costly connectors on any
* disjunct must fit into an integer.
*/
public final static int NEGATIVECOST = -1000000;
/**
* no connector will have cost this high
*/
public final static int NOCUTOFF = 1000;
/**
* the string to use to show the wall
*/
public final static String LEFT_WALL_DISPLAY = "LEFT-WALL";
/**
* If this connector is used on the wall,
* then suppress the display of the wall
* bogus name to prevent ever suppressing
*/
public final static String LEFT_WALL_SUPPRESS = "Wd";
/**
* the string to use to show the wall
*/
public final static String RIGHT_WALL_DISPLAY = "RIGHT-WALL";
/**
* Supress if this connector is used on the wall
*/
final static String RIGHT_WALL_SUPPRESS = "RW";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String LEFT_WALL_WORD = "LEFT-WALL";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String RIGHT_WALL_WORD = "RIGHT-WALL";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String POSTPROCESS_WORD = "POSTPROCESS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String ANDABLE_CONNECTORS_WORD = "ANDABLE-CONNECTORS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String UNLIMITED_CONNECTORS_WORD = "UNLIMITED-CONNECTORS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String PROPER_WORD = "CAPITALIZED-WORDS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String PL_PROPER_WORD = "PL-CAPITALIZED-WORDS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String HYPHENATED_WORD = "HYPHENATED-WORDS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String NUMBER_WORD = "NUMBERS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String ING_WORD = "ING-WORDS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String S_WORD = "S-WORDS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String ED_WORD = "ED-WORDS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String LY_WORD = "LY-WORDS";
/**
* Defins the name of a special string in the dictionary.
*/
public final static String UNKNOWN_WORD = "UNKNOWN-WORD";
/**
* file names (including paths)
* should not be longer than this
*/
public final static int MAX_PATH_NAME = 400;
/**
* Some size definitions. Reduce these for small machines - Left over from C not needed in Java
*/
/**
* maximum number of chars in a word
*/
public final static int MAX_WORD = 60;
/**
* maximum number of chars in a sentence
*/
public final static int MAX_LINE = 1500;
/**
* maximum number of words in a sentence
*/
public final static int MAX_SENTENCE = 250;
/**
* This is the maximum number of links allowed.
*
* It cannot be more than 254, because I use word MAX_SENTENCE+1 to
* indicate that nothing can connect to this connector, and this
* should fit in one byte (if the word field of a connector is an
* (unsigned char)
*/
public final static int MAX_LINKS = (2 * MAX_SENTENCE - 3);
/**
* maximum number of chars in a token
*/
public final static int MAX_TOKEN_LENGTH = 50;
/**
* Max disjunct cost to allow
*/
public final static int MAX_DISJUNCT_COST = 10000;
public final static int DOWN_priority = 2;
public final static int UP_priority = 1;
public final static int THIN_priority = 0;
/** the labels >= 0 are used by fat links while -1 is used for normal connectors */
public final static int NORMAL_LABEL = (-1);
public final static int UNLIMITED_LEN = 255;
public final static int SHORT_LEN = 6;
public final static int NO_WORD = 255;
/* Here are the types */
public final static int OR_type = 0;
public final static int AND_type = 1;
public final static int CONNECTOR_type = 2;
/** These parameters tell power_pruning, to tell whether this is before or after
generating and disjuncts. GENTLE is before RUTHLESS is after. */
public final static int GENTLE = 1;
public final static int RUTHLESS = 0;
public static final int PP_LEXER_MAX_LABELS = 512;
/** CostModel sort by Violations, Disjunct cost, And cost, Link cost */
public static final int VDAL = 1;
public final static int HT_SIZE = (1 << 10);
/** size of random table for computing the
hash functions. must be a power of 2 */
public final static int RTSIZE = 256;
public final static int NODICT = 1;
public final static int DICTPARSE = 2;
public final static int WORDFILE = 3;
public final static int SEPARATE = 4;
public final static int NOTINDICT = 5;
public final static int BUILDEXPR = 6;
public final static int INTERNALERROR = 7;
public final static int LINKSET_SPARSENESS = 2;
public final static int LINKSET_MAX_SETS = 512;
public final static int LINKSET_DEFAULT_SEED = 37;
public final static int PP_FIRST_PASS = 1;
public final static int PP_SECOND_PASS = 2;
public final static int MAXINPUT = 1024;
public final static int DISPLAY_MAX = 1024;
/** input lines beginning with this are ignored */
public final static char COMMENT_CHAR = '%';
public final static char UNGRAMMATICAL = '*';
public final static char PARSE_WITH_DISJUNCT_COST_GT_0 = ':';
public final static char NO_LABEL = ' ';
/** the indiction in a word field that this connector cannot
be used -- is obsolete.
*/
public final static int BAD_WORD = (MAX_SENTENCE + 1);
public final static int PP_MAX_DOMAINS = 128;
//final static int LINKSET_SPARSENESS=2;
public final static int LINKSET_SEED_VALUE = 37;
/** just needs to be approximate */
public final static int PP_MAX_UNIQUE_LINK_NAMES = 1024;
public final static int LINE_LIMIT = 70;
public static final String DEFAULTPATH = ".:./data:./data/link:/home/liferay/linkgrammar:/home/liferay/linkgrammar/data:/home/liferay/linkgrammar/data/link";
public final static int MAX_STRIP = 10;
public final static int MAX_HEIGHT = 30;
/** to hook the comma to the following "and" */
public final static int COMMA_LABEL = (-2);
/** to connect the "either" to the following "or" */
public final static int EITHER_LABEL = (-3);
/** to connect the "neither" to the following "nor"*/
public final static int NEITHER_LABEL = (-4);
/** to connect the "not" to the following "but"*/
public final static int NOT_LABEL = (-5);
/** to connect the "not" to the following "only"*/
public final static int NOTONLY_LABEL = (-6);
/** to connect the "both" to the following "and"*/
public final static int BOTH_LABEL = (-7);
public final static int MAXCONSTITUENTS = 1024;
public final static int MAXSUBL = 16;
public final static char OPEN_BRACKET = '[';
public final static char CLOSE_BRACKET = ']';
public final static int CType_OPEN = 0;
public final static int CType_CLOSE = 1;
public final static int CType_WORD = 2;
public final static int WType_NONE = 0;
public final static int WType_STYPE = 1;
public final static int WType_PTYPE = 2;
public final static int WType_QTYPE = 3;
public final static int WType_QDTYPE = 4;
public static int batch_errors = 0;
public static boolean input_pending = false;
public static int input_char;
public static ParseOptions opts;
public static int lperrno;
public static String lperrmsg;
/** keeping statistics */
public static int STAT_N_disjuncts;
public static int STAT_calls_to_equality_test;
/** Prints s then prints the last |t|-|s| characters of t.
if s is longer than t, it truncates s.
*/
public static void left_append_string(StringBuffer string, String s, String t) {
int i, j, k;
j = t.length();
k = s.length();
for (i = 0; i < j; i++) {
if (i < k) {
string.append(s.charAt(i));
} else {
string.append(t.charAt(i));
}
}
}
public static int strip_off_label(StringBuffer input_string) {
int c;
c = input_string.charAt(0);
switch (c) {
case UNGRAMMATICAL :
case PARSE_WITH_DISJUNCT_COST_GT_0 :
input_string.setCharAt(0, ' ');
return c;
default :
return NO_LABEL;
}
}
public static boolean special_command(StringBuffer input_string, Dictionary dict) {
if (input_string.charAt(0) == '\n')
return true;
if (input_string.charAt(0) == COMMENT_CHAR)
return true;
if (input_string.charAt(0) == '!') {
opts.issue_special_command(input_string.substring(1), dict);
return true;
}
return false;
}
public static void batch_process_some_linkages(int label, Sentence sent, ParseOptions opts) {
Linkage linkage;
if (there_was_an_error(label, sent, opts) != 0) {
if (sent.sentence_num_linkages_found() > 0) {
linkage = new Linkage(0, sent, opts);
linkage.process_linkage(opts);
}
opts.out.println("+++++ error " + batch_errors);
}
}
public static int there_was_an_error(int label, Sentence sent, ParseOptions opts) {
if (sent.sentence_num_valid_linkages() > 0) {
if (label == UNGRAMMATICAL) {
opts.out.println("error: parsed ungrammatical sentence");
batch_errors++;
return UNGRAMMATICAL;
}
if ((sent.sentence_disjunct_cost(0) == 0) && (label == PARSE_WITH_DISJUNCT_COST_GT_0)) {
opts.out.println("error: cost=0");
batch_errors++;
return PARSE_WITH_DISJUNCT_COST_GT_0;
}
} else {
if (label != UNGRAMMATICAL) {
opts.out.println("error: failed");
batch_errors++;
return UNGRAMMATICAL;
}
}
return 0;
}
public static void process_some_linkages(Sentence sent, ParseOptions opts) throws IOException {
int i, c, num_displayed, num_to_query;
Linkage linkage;
if (opts.verbosity > 0)
sent.print_parse_statistics(opts);
if (!opts.parse_options_get_display_bad()) {
num_to_query = Math.min(sent.sentence_num_valid_linkages(), DISPLAY_MAX);
} else {
num_to_query = Math.min(sent.sentence_num_linkages_post_processed(), DISPLAY_MAX);
}
for (i = 0, num_displayed = 0; i < num_to_query; ++i) {
if ((sent.sentence_num_violations(i) > 0) && (!opts.parse_options_get_display_bad())) {
continue;
}
linkage = new Linkage(i, sent, opts);
if (opts.verbosity > 0) {
if (sent.sentence_num_valid_linkages() == 1 && (!opts.parse_options_get_display_bad())) {
opts.out.print(" Unique linkage, ");
} else if ((opts.parse_options_get_display_bad()) && (sent.sentence_num_violations(i) > 0)) {
opts.out.print(" Linkage " + (i + 1) + " (bad), ");
} else {
opts.out.print(" Linkage " + (i + 1) + ", ");
}
if (!linkage.linkage_is_canonical()) {
opts.out.print("non-canonical, ");
}
if (linkage.linkage_is_improper()) {
opts.out.print("improper fat linkage, ");
}
if (linkage.linkage_has_inconsistent_domains()) {
opts.out.print("inconsistent domains, ");
}
opts.out.println(
"cost vector = (UNUSED="
+ linkage.linkage_unused_word_cost()
+ " DIS="
+ linkage.linkage_disjunct_cost()
+ " AND="
+ linkage.linkage_and_cost()
+ " LEN="
+ linkage.linkage_link_cost()
+ ")");
}
linkage.process_linkage(opts);
if (++num_displayed < num_to_query) {
if (opts.verbosity > 0) {
opts.out.println("Press RETURN for the next linkage.");
}
c = fget_input_char(System.in, opts);
if (c != '\n' && c != '\r') {
input_char = c;
input_pending = true;
break;
}
}
}
}
public static int fget_input_char(InputStream in, ParseOptions opts) throws IOException {
if (!opts.parse_options_get_batch_mode() && (opts.verbosity > 0))
opts.out.print("linkparser> ");
opts.out.flush();
return in.read();
}
public static boolean fget_input_string(StringBuffer input_string, InputStream in, PrintStream out, ParseOptions opts)
throws IOException {
int c;
input_string.setLength(0);
if (input_pending) {
input_pending = false;
c = input_char;
} else {
if (!opts.parse_options_get_batch_mode() && opts.verbosity > 0)
out.println("linkparser> ");
out.flush();
c = in.read();
}
while (c != '\n') {
if (c < 0) {
return false;
}
input_string.append((char)c);
c = in.read();
}
return true;
}
}