Connector.java example

Explorer
morphadorner-opensource-master
- src
package net.sf.jlinkgrammar;

/**
 * TODO add javadoc
 *
 */
public class Connector {
    int label;
    /* The nearest word to my left (or right) that
       this could connect to.  Computed by power pruning */
    int word;
    /* If this is a length limited connector, this
    gives the limit of the length of the link
    that can be used on this connector.  Since
    this is strictly a funcion of the connector
    name, efficiency is the only reason to store
    this.  If no limit, the value is set to 255. */
    int length_limit;
    int priority; /* one of the three priorities above */
    boolean multi; /* true if this is a multi-connector */
    Connector next;
    String string;

    Connector(Connector c) {
        label = c.label;
        word = c.word;
        length_limit = c.length_limit;
        priority = c.priority;
        multi = c.multi;
        next = c.next;
        string = c.string;
    }

    Connector() {
    }

    Connector init_connector() {
        length_limit = GlobalBean.UNLIMITED_LEN;
        /*    c.my_word = NO_WORD;  */ /* mark it unset, to make sure it gets set later */
        return this;
    }

    static Connector reverse(Connector e) {
        /* reverse the order of the list e.  destructive */
        Connector head, x;
        head = null;
        while (e != null) {
            x = e.next;
            e.next = head;
            head = e;
            e = x;
        }
        return head;
    }

    boolean connectors_equal_prune(Connector c2) {
        /* The connectors must be exactly equal.  A similar function
           is connectors_equal_AND(), but that ignores priorities,
           this does not.
        */
        return label == c2.label && multi == c2.multi && priority == c2.priority && string.equals(c2.string);
    }

    boolean connector_types_equal(Connector c2) {
        /* Two connectors are said to be of the same type if they have
           the same label, and the initial upper case letters of their
           strings match.
        */
        String s, t;
        if (label != c2.label)
            return false;
        s = string;
        t = c2.string;
        int i = 0;
        while (i < s.length()
            && i < t.length()
            && (Character.isUpperCase(s.charAt(i)) || Character.isUpperCase(t.charAt(i)))) {
            if (s.charAt(i) != t.charAt(i))
                return false;
            i++;
        }
        return true;
    }

    boolean connectors_equal_AND(Connector c2) {
        /* Two connectors are said to be equal if they are of the same type
           (defined above), they have the same multi field, and they have
           exactly the same connectors (including lower case chars).
           (priorities ignored).
        */
        return label == c2.label && multi == c2.multi && string.equals(c2.string);
    }

    boolean match_in_connector_set(Sentence sent, ConnectorSet conset, int d) {
        /* Returns true the given connector is in this conset.  false otherwise.
           d='+' means this connector is on the right side of the disjunct.
           d='-' means this connector is on the left side of the disjunct.
        */
        int h;
        Connector c1;
        if (conset == null)
            return false;
        h = conset.connector_set_hash(string, d);
        for (c1 = conset.hash_table[h]; c1 != null; c1 = c1.next) {
            if (x_match(sent, c1, this) && (d == c1.word))
                return true;
        }
        return false;
    }

    int and_connector_hash(int i) {
        /* This hash function that takes a connector and a seed value i.
           It only looks at the leading upper case letters of
           the string, and the label.  This ensures that if two connectors
           match, then they must hash to the same place. 
        */
        String s;
        s = string;

        i = i + (i << 1) + MyRandom.randtable[(label + i) & (GlobalBean.RTSIZE - 1)];
        int j = 0;
        while (j < s.length() && Character.isUpperCase(s.charAt(j))) {
            i = i + (i << 1) + MyRandom.randtable[(s.charAt(j) + i) & (GlobalBean.RTSIZE - 1)];
            j++;
        }
        return (i & (GlobalBean.HT_SIZE - 1));
    }

    static boolean x_match(Sentence sent, Connector a, Connector b) {
        return match(sent, a, b, 0, 0);
    }

    static boolean match(Sentence sent, Connector a, Connector b, int aw, int bw) {
        /* 
           Returns true if s and t match according to the connector matching
           rules.  The connector strings must be properly formed, starting with
           zero or more upper case letters, followed by some other letters, and
           The algorithm is symmetric with respect to a and b.
        
           It works as follows:  The labels must match.  The priorities must be
           compatible (both THIN_priority, or one UP_priority and one DOWN_priority).
           The sequence of upper case letters must match exactly.  After these comes
           a sequence of lower case letters "*"s or "^"s.  The matching algorithm
           is different depending on which of the two priority cases is being
           considered.  See the comments below.  */
        String s, t;
        int x, y, dist;
        if (a.label != b.label)
            return false;
        x = a.priority;
        y = b.priority;

        s = a.string;
        t = b.string;
        int i = 0;

        while (i < s.length()
            && i < t.length()
            && (Character.isUpperCase(s.charAt(i)) || Character.isUpperCase(t.charAt(i)))) {
            if (s.charAt(i) != t.charAt(i))
                return false;
            i++;
        }
        if ((i < s.length() && Character.isUpperCase(s.charAt(i)))
            || (i < t.length() && Character.isUpperCase(t.charAt(i)))) {
            return false;
        }

        if (aw == 0 && bw == 0) { /* probably not necessary, as long as effective_dist[0][0]=0 and is defined */
            dist = 0;
        } else {
            if (!(aw < bw)) {
                throw new RuntimeException("match() did not receive params in the natural order.");
            }
            dist = sent.effective_dist[aw][bw];
        }
        /*    printf("M: a=%4s b=%4s  ap=%d bp=%d  aw=%d  bw=%d  a.ll=%d b.ll=%d  dist=%d\n",
           s, t, x, y, aw, bw, a.length_limit, b.length_limit, dist); */
        if (dist > a.length_limit || dist > b.length_limit)
            return false;

        if ((x == GlobalBean.THIN_priority) && (y == GlobalBean.THIN_priority)) {
            /*
               Remember that "*" matches anything, and "^" matches nothing
               (except "*").  Otherwise two characters match if and only if
               they're equal.  ("^" can be used in the dictionary just like
               any other connector.)
               */
            while (i < s.length() && i < t.length()) {
                if ((s.charAt(i) == '*')
                    || (t.charAt(i) == '*')
                    || ((s.charAt(i) == t.charAt(i)) && (s.charAt(i) != '^'))) {
                    i++;
                } else
                    return false;
            }
            return true;
        } else if ((x == GlobalBean.UP_priority) && (y == GlobalBean.DOWN_priority)) {
            /*
               As you go up (namely from x to y) the set of strings that
               match (in the normal THIN sense above) should get no larger.
               Read the comment in and.c to understand this.
               In other words, the y string (t) must be weaker (or at least
               no stronger) that the x string (s).
            
               This code is only correct if the strings are the same
               length.  This is currently true, but perhaps for safty
               this assumption should be removed.
               */
            while (i < s.length() && i < t.length()) {
                if ((s.charAt(i) == t.charAt(i)) || (s.charAt(i) == '*') || (t.charAt(i) == '^')) {
                    i++;
                } else
                    return false;
            }
            return true;
        } else if ((y == GlobalBean.UP_priority) && (x == GlobalBean.DOWN_priority)) {
            while (i < s.length() && i < t.length()) {
                if ((s.charAt(i) == t.charAt(i)) || (t.charAt(i) == '*') || (s.charAt(i) == '^')) {
                    i++;
                } else
                    return false;
            }
            return true;
        } else
            return false;
    }

    static boolean x_prune_match(Sentence sent, Connector a, Connector b) {
        return prune_match(sent, a, b, 0, 0);
    }

    static boolean prune_match(Sentence sent, Connector a, Connector b, int aw, int bw) {
        /* This is almost identical to match().  Its reason for existance
           is the rather subtle fact that with "and" can transform a "Ss"
           connector into "Sp".  This means that in order for pruning to
           work, we must allow a "Ss" connector on word match an "Sp" connector
           on a word to its right.  This is what this version of match allows.
           we assume that a is on a word to the left of b.
        */
        String s, t;
        int x, y, dist;
        if (a.label != b.label)
            return false;
        x = a.priority;
        y = b.priority;

        s = a.string;
        t = b.string;

        int i = 0;
        while (i < s.length()
            && i < t.length()
            && (Character.isUpperCase(s.charAt(i)) || Character.isUpperCase(t.charAt(i)))) {
            if (s.charAt(i) != t.charAt(i))
                return false;
            i++;
        }
        if ((i < s.length() && Character.isUpperCase(s.charAt(i)))
            || (i < t.length() && Character.isUpperCase(t.charAt(i)))) {
            return false;
        }

        if (aw == 0 && bw == 0) { /* probably not necessary, as long as effective_dist[0][0]=0 and is defined */
            dist = 0;
        } else {
            if (!(aw < bw)) {
                throw new RuntimeException("prune_match() did not receive params in the natural order.");
            }
            dist = sent.effective_dist[aw][bw];
        }
        /*    printf("PM: a=%4s b=%4s  ap=%d bp=%d  aw=%d  bw=%d  a.ll=%d b.ll=%d  dist=%d\n", 
           s, t, x, y, aw, bw, a.length_limit, b.length_limit, dist); */
        if (dist > a.length_limit || dist > b.length_limit) {
            return false;
        }

        if ((x == GlobalBean.THIN_priority) && (y == GlobalBean.THIN_priority)) {
            //    if PLURALIZATION
            /*
                    if ((*(a.string)=='S') && ((*s=='s') || (*s=='p')) &&  (*t=='p')) {
                    return true;
                }
            */
            /*
               The above is a kludge to stop pruning from killing off disjuncts
               which (because of pluralization in and) might become valid later.
               Recall that "and" converts a singular subject into a plural one.
               The (*s=='p') part is so that "he and I are good" doesn't get killed off.
               The above hack is subsumed by the following one:
            */
            if (i < s.length()
                && i < t.length()
                && a.string.charAt(0) == 'S'
                && ((s.charAt(i) == 's') || (s.charAt(i) == 'p'))
                && ((t.charAt(i) == 'p') || (t.charAt(i) == 's'))
                && (i == 1 || (i == 2 && s.charAt(i - 1) == 'I'))) {
                return true;
            }
            /*
               This change is to accommodate "nor".  In particular we need to
               prevent "neither John nor I likes dogs" from being killed off.
               We want to allow this to apply to "are neither a dog nor a cat here"
               and "is neither a dog nor a cat here".  This uses the "SI" connector.
               The third line above ensures that the connector is either "S" or "SI".
            */
            //     end PLURALIZATION
            while (i < s.length() && i < t.length()) {
                if (s.charAt(i) == '*' || t.charAt(i) == '*' || (s.charAt(i) == t.charAt(i) && s.charAt(i) != '^')) {
                    /* this last case here is rather obscure.  It prevents
                       '^' from matching '^'.....Is this necessary?
                    ......yes, I think it is.   */
                    i++;
                } else {
                    return false;
                }
            }
            return true;
        } else if ((x == GlobalBean.UP_priority) && (y == GlobalBean.DOWN_priority)) {
            while (i < s.length() && i < t.length()) {
                if (s.charAt(i) == t.charAt(i) || s.charAt(i) == '*' || t.charAt(i) == '^') {
                    /* that '^' should match on the DOWN_priority
                    node is subtle, but correct */
                    i++;
                } else {
                    return false;
                }
            }
            return true;
        } else if ((y == GlobalBean.UP_priority) && (x == GlobalBean.DOWN_priority)) {
            while (i < s.length() && i < t.length()) {
                if (s.charAt(i) == t.charAt(i) || t.charAt(i) == '*' || s.charAt(i) == '^') {
                    i++;
                } else {
                    return false;
                }
            }
            return true;
        } else {
            return false;
        }
    }

    static Connector copy_connectors(Connector c) {
        /* This builds a new copy of the connector list pointed to by c.
           Strings, as usual, are not copied.
        */
        Connector c1;
        if (c == null)
            return null;
        c1 = new Connector(c);
        c1.next = copy_connectors(c.next);
        return c1;
    }

}