package net.sf.jlinkgrammar;
/**
* TODO add javadoc
*
*/
public class Connector {
int label;
/* The nearest word to my left (or right) that
this could connect to. Computed by power pruning */
int word;
/* If this is a length limited connector, this
gives the limit of the length of the link
that can be used on this connector. Since
this is strictly a funcion of the connector
name, efficiency is the only reason to store
this. If no limit, the value is set to 255. */
int length_limit;
int priority; /* one of the three priorities above */
boolean multi; /* true if this is a multi-connector */
Connector next;
String string;
Connector(Connector c) {
label = c.label;
word = c.word;
length_limit = c.length_limit;
priority = c.priority;
multi = c.multi;
next = c.next;
string = c.string;
}
Connector() {
}
Connector init_connector() {
length_limit = GlobalBean.UNLIMITED_LEN;
/* c.my_word = NO_WORD; */ /* mark it unset, to make sure it gets set later */
return this;
}
static Connector reverse(Connector e) {
/* reverse the order of the list e. destructive */
Connector head, x;
head = null;
while (e != null) {
x = e.next;
e.next = head;
head = e;
e = x;
}
return head;
}
boolean connectors_equal_prune(Connector c2) {
/* The connectors must be exactly equal. A similar function
is connectors_equal_AND(), but that ignores priorities,
this does not.
*/
return label == c2.label && multi == c2.multi && priority == c2.priority && string.equals(c2.string);
}
boolean connector_types_equal(Connector c2) {
/* Two connectors are said to be of the same type if they have
the same label, and the initial upper case letters of their
strings match.
*/
String s, t;
if (label != c2.label)
return false;
s = string;
t = c2.string;
int i = 0;
while (i < s.length()
&& i < t.length()
&& (Character.isUpperCase(s.charAt(i)) || Character.isUpperCase(t.charAt(i)))) {
if (s.charAt(i) != t.charAt(i))
return false;
i++;
}
return true;
}
boolean connectors_equal_AND(Connector c2) {
/* Two connectors are said to be equal if they are of the same type
(defined above), they have the same multi field, and they have
exactly the same connectors (including lower case chars).
(priorities ignored).
*/
return label == c2.label && multi == c2.multi && string.equals(c2.string);
}
boolean match_in_connector_set(Sentence sent, ConnectorSet conset, int d) {
/* Returns true the given connector is in this conset. false otherwise.
d='+' means this connector is on the right side of the disjunct.
d='-' means this connector is on the left side of the disjunct.
*/
int h;
Connector c1;
if (conset == null)
return false;
h = conset.connector_set_hash(string, d);
for (c1 = conset.hash_table[h]; c1 != null; c1 = c1.next) {
if (x_match(sent, c1, this) && (d == c1.word))
return true;
}
return false;
}
int and_connector_hash(int i) {
/* This hash function that takes a connector and a seed value i.
It only looks at the leading upper case letters of
the string, and the label. This ensures that if two connectors
match, then they must hash to the same place.
*/
String s;
s = string;
i = i + (i << 1) + MyRandom.randtable[(label + i) & (GlobalBean.RTSIZE - 1)];
int j = 0;
while (j < s.length() && Character.isUpperCase(s.charAt(j))) {
i = i + (i << 1) + MyRandom.randtable[(s.charAt(j) + i) & (GlobalBean.RTSIZE - 1)];
j++;
}
return (i & (GlobalBean.HT_SIZE - 1));
}
static boolean x_match(Sentence sent, Connector a, Connector b) {
return match(sent, a, b, 0, 0);
}
static boolean match(Sentence sent, Connector a, Connector b, int aw, int bw) {
/*
Returns true if s and t match according to the connector matching
rules. The connector strings must be properly formed, starting with
zero or more upper case letters, followed by some other letters, and
The algorithm is symmetric with respect to a and b.
It works as follows: The labels must match. The priorities must be
compatible (both THIN_priority, or one UP_priority and one DOWN_priority).
The sequence of upper case letters must match exactly. After these comes
a sequence of lower case letters "*"s or "^"s. The matching algorithm
is different depending on which of the two priority cases is being
considered. See the comments below. */
String s, t;
int x, y, dist;
if (a.label != b.label)
return false;
x = a.priority;
y = b.priority;
s = a.string;
t = b.string;
int i = 0;
while (i < s.length()
&& i < t.length()
&& (Character.isUpperCase(s.charAt(i)) || Character.isUpperCase(t.charAt(i)))) {
if (s.charAt(i) != t.charAt(i))
return false;
i++;
}
if ((i < s.length() && Character.isUpperCase(s.charAt(i)))
|| (i < t.length() && Character.isUpperCase(t.charAt(i)))) {
return false;
}
if (aw == 0 && bw == 0) { /* probably not necessary, as long as effective_dist[0][0]=0 and is defined */
dist = 0;
} else {
if (!(aw < bw)) {
throw new RuntimeException("match() did not receive params in the natural order.");
}
dist = sent.effective_dist[aw][bw];
}
/* printf("M: a=%4s b=%4s ap=%d bp=%d aw=%d bw=%d a.ll=%d b.ll=%d dist=%d\n",
s, t, x, y, aw, bw, a.length_limit, b.length_limit, dist); */
if (dist > a.length_limit || dist > b.length_limit)
return false;
if ((x == GlobalBean.THIN_priority) && (y == GlobalBean.THIN_priority)) {
/*
Remember that "*" matches anything, and "^" matches nothing
(except "*"). Otherwise two characters match if and only if
they're equal. ("^" can be used in the dictionary just like
any other connector.)
*/
while (i < s.length() && i < t.length()) {
if ((s.charAt(i) == '*')
|| (t.charAt(i) == '*')
|| ((s.charAt(i) == t.charAt(i)) && (s.charAt(i) != '^'))) {
i++;
} else
return false;
}
return true;
} else if ((x == GlobalBean.UP_priority) && (y == GlobalBean.DOWN_priority)) {
/*
As you go up (namely from x to y) the set of strings that
match (in the normal THIN sense above) should get no larger.
Read the comment in and.c to understand this.
In other words, the y string (t) must be weaker (or at least
no stronger) that the x string (s).
This code is only correct if the strings are the same
length. This is currently true, but perhaps for safty
this assumption should be removed.
*/
while (i < s.length() && i < t.length()) {
if ((s.charAt(i) == t.charAt(i)) || (s.charAt(i) == '*') || (t.charAt(i) == '^')) {
i++;
} else
return false;
}
return true;
} else if ((y == GlobalBean.UP_priority) && (x == GlobalBean.DOWN_priority)) {
while (i < s.length() && i < t.length()) {
if ((s.charAt(i) == t.charAt(i)) || (t.charAt(i) == '*') || (s.charAt(i) == '^')) {
i++;
} else
return false;
}
return true;
} else
return false;
}
static boolean x_prune_match(Sentence sent, Connector a, Connector b) {
return prune_match(sent, a, b, 0, 0);
}
static boolean prune_match(Sentence sent, Connector a, Connector b, int aw, int bw) {
/* This is almost identical to match(). Its reason for existance
is the rather subtle fact that with "and" can transform a "Ss"
connector into "Sp". This means that in order for pruning to
work, we must allow a "Ss" connector on word match an "Sp" connector
on a word to its right. This is what this version of match allows.
we assume that a is on a word to the left of b.
*/
String s, t;
int x, y, dist;
if (a.label != b.label)
return false;
x = a.priority;
y = b.priority;
s = a.string;
t = b.string;
int i = 0;
while (i < s.length()
&& i < t.length()
&& (Character.isUpperCase(s.charAt(i)) || Character.isUpperCase(t.charAt(i)))) {
if (s.charAt(i) != t.charAt(i))
return false;
i++;
}
if ((i < s.length() && Character.isUpperCase(s.charAt(i)))
|| (i < t.length() && Character.isUpperCase(t.charAt(i)))) {
return false;
}
if (aw == 0 && bw == 0) { /* probably not necessary, as long as effective_dist[0][0]=0 and is defined */
dist = 0;
} else {
if (!(aw < bw)) {
throw new RuntimeException("prune_match() did not receive params in the natural order.");
}
dist = sent.effective_dist[aw][bw];
}
/* printf("PM: a=%4s b=%4s ap=%d bp=%d aw=%d bw=%d a.ll=%d b.ll=%d dist=%d\n",
s, t, x, y, aw, bw, a.length_limit, b.length_limit, dist); */
if (dist > a.length_limit || dist > b.length_limit) {
return false;
}
if ((x == GlobalBean.THIN_priority) && (y == GlobalBean.THIN_priority)) {
// if PLURALIZATION
/*
if ((*(a.string)=='S') && ((*s=='s') || (*s=='p')) && (*t=='p')) {
return true;
}
*/
/*
The above is a kludge to stop pruning from killing off disjuncts
which (because of pluralization in and) might become valid later.
Recall that "and" converts a singular subject into a plural one.
The (*s=='p') part is so that "he and I are good" doesn't get killed off.
The above hack is subsumed by the following one:
*/
if (i < s.length()
&& i < t.length()
&& a.string.charAt(0) == 'S'
&& ((s.charAt(i) == 's') || (s.charAt(i) == 'p'))
&& ((t.charAt(i) == 'p') || (t.charAt(i) == 's'))
&& (i == 1 || (i == 2 && s.charAt(i - 1) == 'I'))) {
return true;
}
/*
This change is to accommodate "nor". In particular we need to
prevent "neither John nor I likes dogs" from being killed off.
We want to allow this to apply to "are neither a dog nor a cat here"
and "is neither a dog nor a cat here". This uses the "SI" connector.
The third line above ensures that the connector is either "S" or "SI".
*/
// end PLURALIZATION
while (i < s.length() && i < t.length()) {
if (s.charAt(i) == '*' || t.charAt(i) == '*' || (s.charAt(i) == t.charAt(i) && s.charAt(i) != '^')) {
/* this last case here is rather obscure. It prevents
'^' from matching '^'.....Is this necessary?
......yes, I think it is. */
i++;
} else {
return false;
}
}
return true;
} else if ((x == GlobalBean.UP_priority) && (y == GlobalBean.DOWN_priority)) {
while (i < s.length() && i < t.length()) {
if (s.charAt(i) == t.charAt(i) || s.charAt(i) == '*' || t.charAt(i) == '^') {
/* that '^' should match on the DOWN_priority
node is subtle, but correct */
i++;
} else {
return false;
}
}
return true;
} else if ((y == GlobalBean.UP_priority) && (x == GlobalBean.DOWN_priority)) {
while (i < s.length() && i < t.length()) {
if (s.charAt(i) == t.charAt(i) || t.charAt(i) == '*' || s.charAt(i) == '^') {
i++;
} else {
return false;
}
}
return true;
} else {
return false;
}
}
static Connector copy_connectors(Connector c) {
/* This builds a new copy of the connector list pointed to by c.
Strings, as usual, are not copied.
*/
Connector c1;
if (c == null)
return null;
c1 = new Connector(c);
c1.next = copy_connectors(c.next);
return c1;
}
}