package LBJ2.nlp;
import LBJ2.parse.LinkedVector;
import LBJ2.parse.Parser;
/**
* This parser takes the plain, unannotated {@link Sentence}s returned by
* another parser (e.g., {@link SentenceSplitter}) and splits them into
* {@link Word} objects. Entire sentences now represented as
* {@link LBJ2.parse.LinkedVector}s are then returned one at a time by calls
* to the <code>next()</code> method.
*
* <p> A {@link #main(String[])} method is also implemented which applies
* this class to plain text in a straight-forward way.
*
* @author Nick Rizzolo
**/
public class WordSplitter implements Parser
{
/**
* Run this program on a file containing plain text, and it will produce
* the same text on <code>STDOUT</code> rearranged so that each line
* contains exactly one sentence, and so that character sequences deemed to
* be "words" are delimited by whitespace.
*
* <p> Usage:
* <code> java LBJ2.nlp.WordSplitter <file name> </code>
*
* @param args The command line arguments.
**/
public static void main(String[] args) {
String filename = null;
try {
filename = args[0];
if (args.length > 1) throw new Exception();
}
catch (Exception e) {
System.err.println("usage: java LBJ2.nlp.WordSplitter <file name>");
System.exit(1);
}
WordSplitter splitter = new WordSplitter(new SentenceSplitter(filename));
for (LinkedVector s = (LinkedVector) splitter.next(); s != null;
s = (LinkedVector) splitter.next()) {
if (s.size() > 0) {
Word w = (Word) s.get(0);
System.out.print(w.form);
for (w = (Word) w.next ; w != null; w = (Word) w.next)
System.out.print(" " + w.form);
}
System.out.println();
}
}
/** The {@link Sentence} returning parser. */
protected Parser parser;
/**
* Initializing constructor.
*
* @param p The {@link Sentence} returning parser.
**/
public WordSplitter(Parser p) { parser = p; }
/**
* Returns {@link LBJ2.parse.LinkedVector}s of {@link Word} objects one at
* a time.
**/
public Object next() {
Sentence sentence = (Sentence) parser.next();
if (sentence == null) return null;
return sentence.wordSplit();
}
/** Sets this parser back to the beginning of the raw data. */
public void reset() { parser.reset(); }
/** Frees any resources this parser may be holding. */
public void close() { parser.close(); }
}