package LBJ2.nlp;
import LBJ2.parse.LinkedVector;
/**
* Use this class to represent a natural language document.
* <code>SentenceSplitter</code> and <code>Sentence.wordSplit()</code> are
* used to represent the text of the document internally as a collection of
* vectors of words. As such, the text of the document is assumed plain,
* i.e. there should not be any mark-up.
*
* @author Nick Rizzolo
**/
public class NLDocument extends LinkedVector
{
/** The name of the file this document came from. */
private String fileName;
/**
* This constructor takes the entire text of the document in a String array
* as input and initializes the representation.
*
* @param text The entire text of the document. Each element of this array
* should represent a line of input without any line
* termination characters.
**/
public NLDocument(String[] text) { this(null, text); }
/**
* This constructor takes the entire text of the document in a String array
* as input and initializes the representation.
*
* @param p The previous child in the parent vector.
* @param text The entire text of the document. Each element of this array
* should represent a line of input without any line
* termination characters.
**/
public NLDocument(NLDocument p, String[] text) {
super(p);
addAll(new SentenceSplitter(text));
}
/**
* Creates a document from the contents of the named file.
*
* @param file The name of the file containing a natural language, plain
* text document.
**/
public NLDocument(String file) { this(null, file); }
/**
* Creates a document from the contents of the named file.
*
* @param p The previous child in the parent vector.
* @param file The name of the file containing a natural language, plain
* text document.
**/
public NLDocument(NLDocument p, String file) {
super(p);
fileName = file;
addAll(new SentenceSplitter(file));
}
/**
* Returns the name of the file this document came from, or
* <code>null</code> if one was not specified.
**/
public String getFileName() { return fileName; }
/**
* Adds all the sentences that come from the argument sentence splitter to
* this document after using a word splitter to chop them up.
*
* @param splitter A sentence splitter.
**/
public void addAll(SentenceSplitter splitter) {
Sentence[] rawSentences = splitter.splitAll();
for (int i = 0; i < rawSentences.length; ++i)
add(rawSentences[i].wordSplit());
}
}