Word.java example

Explorer
MinorThird-master
package LBJ2.nlp;

import LBJ2.parse.LinkedChild;


/**
  * Implementation of a word for natural language processing.  Please note
  * that in general, one can only count on the <code>form</code> and
  * <code>capitalized</code> fields described below having meaningful values.
  * The <code>form</code> field can be assumed to be filled in because it's
  * hard to imagine a situation in which a <code>Word</code> object should be
  * created without any knowledge of how that word appeared in text.  The
  * <code>capitalized</code> field is computed from the <code>form</code> by
  * this class' constructor.
  *
  * <p> <i>All other fields must be obtained or computed externally.  Space is
  * provided for them in this class' implementation as a convenience, since we
  * expect the user will make frequent use of these fields.</i>
  *
  * <p> This class extends from {@link LBJ2.parse.LinkedChild}.  Of course,
  * this means that objects of this class contain references to both the
  * previous and the next word in the sentence.  Constructors are available
  * that take the previous word as an argument, setting that reference.  Thus,
  * a useful technique for constructing all the words in a sentence will
  * involve code that looks like this (where <code>form</code> is a
  * {@link java.lang.String}):
  *
  * <blockquote>
  *   <code>
  *   Word current = new Word(form);<br>
  *   <i>a loop of some sort</i><br>
  *   {<br>
  *       current.next = new Word(form, current);<br>
  *       current = current.next;<br>
  *   }<br>
  * </blockquote>
  *
  * @author Nick Rizzolo
 **/
public class Word extends LinkedChild
{
  /** The actual text from the corpus that represents the word. */
  public String form;
  /**
    * Whether or not the word is capitalized is determined automatically by
    * the constructor.
   **/
  public boolean capitalized;
  /** Names the part of speech of this word. */
  public String partOfSpeech;
  /** The base form of the word. */
  public String lemma;
  /** An indication of the meaning or usage of this instance of this word. */
  public String wordSense;


  /**
    * When all that is known is the spelling of the word.
    *
    * @param f  The actual text of the word.
   **/
  public Word(String f) { this(f, null, null); }

  /**
    * Sets the actual text and the part of speech.
    *
    * @param f    The actual text of the word.
    * @param pos  A token representing the word's part of speech.
   **/
  public Word(String f, String pos) { this(f, pos, null); }

  /**
    * This constructor is useful when the sentence is being parsed forwards.
    *
    * @param f  The actual text of the word.
    * @param p  The word that came before this one in the sentence.
   **/
  public Word(String f, Word p) { this(f, null, p); }

  /**
    * This constructor is useful when the sentence is being parsed forwards.
    *
    * @param f    The actual text of the word.
    * @param pos  A token representing the word's part of speech.
    * @param p    The word that came before this one in the sentence.
   **/
  public Word(String f, String pos, Word p) { this(f, pos, p, -1, -1); }

  /**
    * When you have offset information.
    *
    * @param f      The actual text of the word.
    * @param start  The offset into the parent document at which the first
    *               character of this word is found.
    * @param end    The offset into the parent document at which the last
    *               character of this word is found.
   **/
  public Word(String f, int start, int end) {
    this(f, null, null, start, end);
  }

  /**
    * When you have offset information.
    *
    * @param f      The actual text of the word.
    * @param pos    A token representing the word's part of speech.
    * @param start  The offset into the parent document at which the first
    *               character of this word is found.
    * @param end    The offset into the parent document at which the last
    *               character of this word is found.
   **/
  public Word(String f, String pos, int start, int end) {
    this(f, pos, null, start, end);
  }

  /**
    * This constructor is useful when the sentence is being parsed forwards.
    *
    * @param f      The actual text of the word.
    * @param p      The word that came before this one in the sentence.
    * @param start  The offset into the parent document at which the first
    *               character of this word is found.
    * @param end    The offset into the parent document at which the last
    *               character of this word is found.
   **/
  public Word(String f, Word p, int start, int end) {
    this(f, null, p, start, end);
  }

  /**
    * This constructor is useful when the sentence is being parsed forwards.
    *
    * @param f      The actual text of the word.
    * @param pos    A token representing the word's part of speech.
    * @param p      The word that came before this one in the sentence.
    * @param start  The offset into the parent document at which the first
    *               character of this word is found.
    * @param end    The offset into the parent document at which the last
    *               character of this word is found.
   **/
  public Word(String f, String pos, Word p, int start, int end) {
    this(f, pos, null, null, p, start, end);
  }

  /**
    * This constructor is useful when the sentence is being parsed forwards.
    *
    * @param f      The actual text of the word.
    * @param pos    A token representing the word's part of speech.
    * @param l      The base form of the word.
    * @param sense  The sense of the word.
    * @param p      The word that came before this one in the sentence.
    * @param start  The offset into the parent document at which the first
    *               character of this word is found.
    * @param end    The offset into the parent document at which the last
    *               character of this word is found.
   **/
  public Word(String f, String pos, String l, String sense, Word p, int start,
              int end) {
    super(p, start, end);
    form = f;
    capitalized = f != null && f.length() > 0
                  && Character.isUpperCase(f.charAt(0));
    partOfSpeech = pos;
    if (partOfSpeech != null) POS.fromToken(partOfSpeech);
    lemma = l;
    wordSense = sense;
  }


  /**
    * The string representation of a word is its POS bracket form, or, if the
    * part of speech is not available, it is just the spelling of the word.
    * Note that the POS bracket form of a word also entails displaying left
    * brackets (<code>"("</code>, <code>"["</code>, and <code>"{"</code>) as
    * <code>"-LRB-"</code> and right brackets (<code>")"</code>,
    * <code>"]"</code>, <code>"}"</code>) as <code>"-RRB-"</code>.
    *
    * @return The POS bracket form of this word, or just the spelling of the
    *         word if the part of speech is not available.
   **/
  public String toString() {
    if (partOfSpeech == null) return form;
    String form = this.form;

    if (form.length() == 1) {
      if ("([{".indexOf(form.charAt(0)) != -1) form = "-LRB-";
      if (")]}".indexOf(form.charAt(0)) != -1) form = "-RRB-";
    }

    return "(" + partOfSpeech + " " + form + ")";
  }
}