package plume;
import java.io.*;
import java.util.*;
import java.util.regex.*;
import java.nio.CharBuffer;
// TODO:
// EntryReader has a public concept of "short entry", but I don't think that
// concept is logically part of EntryReader. I think it would be better for
// Lookup to make this decision itself, for instance by checking whether there
// are any line separators in the entry that it gets back.
//
// Here are some useful features that EntryReader should have.
// * It should implement some unimplemented methods from LineNumberReader (see
// "not yet implemented" in this file).
// * It should have constructors that take an InputStream or Reader
// (in addition to the current BufferedReader, File, and String versions).
// * It should have a close method.
// * It should automatically close the underlying file/etc. when the
// iterator gets to the end (or the end is otherwise reached).
/**
* Class that reads "entries" from a file. In the simplest case, entries
* can be lines. It supports:
* include files,
* comments, and
* multi-line entries (paragraphs).
* The syntax of each of these is customizable.
* <p>
*
* Example use:
* <pre>
* try {
* // args are filename, comment regexp, include regexp
* er = new EntryReader(filename, "^#.*", null);
* } catch (IOException e) {
* System.err.println("Unable to read " + filename);
* System.exit(2);
* throw new Error("This can't happen"); // for definite assignment check
* }
* for (String line : er) {
* ...
* }
* </pre>
*
* @see #get_entry() and @see #set_entry_start_stop(String,String)
*/
public class EntryReader extends LineNumberReader implements Iterable<String>, Iterator<String> {
///
/// User configuration variables
///
/** Regular expression that specifies an include file. **/
private final /*@Nullable*/ Pattern include_re;
/** Regular expression that matches a comment **/
private final /*@Nullable*/ Pattern comment_re;
/**
* Regular expression that starts a long entry.
* <p>
* If the first line of an entry matches this regexp, then the entry is
* terminated by: entry_stop_re, another line that matches
* entry_start_re (even not following a newline), or the end of the
* current file.
* <p>
* Otherwise, the first line of an entry does NOT match this regexp (or
* the regexp is null), in which case the entry is terminated by a blank
* line or the end of the current file.
*/
public /*@LazyNonNull*/ Pattern entry_start_re = null;
/**
* @see #entry_start_re
*/
public /*@LazyNonNull*/ Pattern entry_stop_re = null;
///
/// Internal implementation variables
///
/** Stack of readers. Used to support include files */
private Stack<FlnReader> readers = new Stack<FlnReader>();
/** Line that is pushed back to be reread **/
/*@Nullable*/ String pushback_line = null;
/** Platform-specific line separator **/
@SuppressWarnings("nullness") // line.separator property always exists
private static final String lineSep = System.getProperty("line.separator");
///
/// Helper classes
///
/**
* Like LineNumberReader, but also has a filename field.
* "FlnReader" stands for "Filename and Line Number Reader".
*/
private static class FlnReader extends LineNumberReader {
public String filename;
/**
* Filename must be non-null.
* If there isn't a name, clients should provide a dummy value.
*/
public FlnReader (Reader reader, String filename) {
super(reader);
this.filename = filename;
}
public FlnReader (String filename) throws IOException {
super(UtilMDE.fileReader(filename));
this.filename = filename;
}
}
/** Descriptor for an entry (paragraph) **/
public static class Entry {
/** First line of the entry */
public String first_line;
/** Complete body of the entry including the first line **/
public String body;
/** True if this is a short entry (blank line separated) **/
boolean short_entry;
/** Filename in which the entry was found **/
String filename;
/** Line number of first line of entry **/
long line_number;
/** Create an entry **/
Entry (String first_line, String body, String filename, long line_number,
boolean short_entry) {
this.first_line = first_line;
this.body = body;
this.filename = filename;
this.line_number = line_number;
this.short_entry = short_entry;
}
/**
* Return a substring of the entry body that matches the specified
* regular expression. If no match is found, returns the first_line.
*/
String get_description (/*@Nullable*/ Pattern re) {
if (re == null)
return first_line;
Matcher descr = re.matcher (body);
if (descr.find()) {
return descr.group();
} else {
return first_line;
}
}
}
///
/// Constructors
///
/// Inputstream and charset constructors
public EntryReader (InputStream in,
String charsetName,
String filename,
/*@Nullable*/ String comment_re_string,
/*@Nullable*/ String include_re_string) throws UnsupportedEncodingException {
this(new InputStreamReader(in, charsetName),
filename, comment_re_string, include_re_string);
}
/** Create a EntryReader that does not support comments or include directives.
* @see #EntryReader(InputStream,String,String,String) **/
public EntryReader (InputStream in, String charsetName, String filename) throws UnsupportedEncodingException {
this (in, charsetName, filename, null, null);
}
/// Inputstream (no charset) constructors
/**
* Create a EntryReader
*
* @param in Initial source
* @param filename Non-null file name for stream being read
* @param comment_re_string Regular expression that matches comments.
* Any text that matches comment_re is removed.
* A line that is entirely a comment is ignored
* @param include_re_string Regular expression that matches include directives.
* The expression should define one group that contains
* the include file name
*/
public EntryReader (InputStream in, String filename,
/*@Nullable*/ String comment_re_string,
/*@Nullable*/ String include_re_string) {
this(new InputStreamReader(in),
filename, comment_re_string, include_re_string);
}
/**
* Create a EntryReader that uses the default character set and does not
* support comments or include directives.
* @see #EntryReader(InputStream,String,String,String,String) **/
public EntryReader (InputStream in, String filename) {
this (in, filename, null, null);
}
/** Create a EntryReader that does not support comments or include directives.
* @see #EntryReader(InputStream,String,String,String) **/
public EntryReader (InputStream in) {
this (in, "(InputStream)", null, null);
}
private static class DummyReader extends Reader {
public void close() { throw new Error("DummyReader"); }
public void mark(int readAheadLimit) { throw new Error("DummyReader"); }
public boolean markSupported() { throw new Error("DummyReader"); }
public int read() { throw new Error("DummyReader"); }
public int read(char[] cbuf) { throw new Error("DummyReader"); }
public int read(char[] cbuf, int off, int len) { throw new Error("DummyReader"); }
public int read(CharBuffer target) { throw new Error("DummyReader"); }
public boolean ready() { throw new Error("DummyReader"); }
public void reset() { throw new Error("DummyReader"); }
public long skip(long n) { throw new Error("DummyReader"); }
}
/**
* Create a EntryReader
*
* @param reader Initial source
* @param comment_re_string Regular expression that matches comments.
* Any text that matches comment_re is removed.
* A line that is entirely a comment is ignored
* @param include_re_string Regular expression that matches include directives.
* The expression should define one group that contains
* the include file name
*/
public EntryReader (Reader reader, String filename, /*@Nullable*/ String comment_re_string,
/*@Nullable*/ String include_re_string) {
// we won't use superclass methods, but passing null as an argument
// leads to a NullPointerException.
super(new DummyReader());
readers.push (new FlnReader (reader, filename));
if (comment_re_string == null)
comment_re = null;
else
comment_re = Pattern.compile (comment_re_string);
if (include_re_string == null)
include_re = null;
else
include_re = Pattern.compile (include_re_string);
}
/** Create a EntryReader that does not support comments or include directives.
* @see #EntryReader(Reader,String,String,String) **/
public EntryReader (Reader reader) {
this (reader, reader.toString(), null, null);
}
/// File Constructors
/**
* Create a EntryReader
*
* @param file Initial file to read.
* @param comment_re Regular expression that matches comments.
* Any text that matches comment_re is removed.
* A line that is entirely a comment is ignored.
* @param include_re Regular expression that matches include directives.
* The expression should define one group that contains
* the include file name.
*/
public EntryReader (File file, /*@Nullable*/ String comment_re,
/*@Nullable*/ String include_re) throws IOException {
this (UtilMDE.fileReader (file),
file.toString(), comment_re, include_re);
}
/** Create a EntryReader that does not support comments or include directives.
* @see #EntryReader(File,String,String) **/
public EntryReader (File file) throws IOException {
this (file, null, null);
}
/** Create a EntryReader that does not support comments or include directives.
* @see #EntryReader(File,String,String) **/
public EntryReader (File file, String charsetName) throws IOException {
this (UtilMDE.fileInputStream (file),
charsetName, file.toString(), null, null);
}
/// Filename constructors
/**
* Create a new EntryReader starting with the specified file.
* @see #EntryReader(File,String,String)
*/
public EntryReader (String filename, /*@Nullable*/ String comment_re,
/*@Nullable*/ String include_re) throws IOException {
this (new File(filename), comment_re, include_re);
}
/** Create a EntryReader that does not support comments or include directives.
* @see #EntryReader(String,String,String) **/
public EntryReader (String filename) throws IOException {
this (filename, null, null);
}
/** Create a EntryReader that does not support comments or include directives.
* @see #EntryReader(String,String,String) **/
public EntryReader (String filename, String charsetName) throws IOException {
this (new FileInputStream(filename), charsetName, filename, null, null);
}
///
/// Methods
///
/**
* Read a line, ignoring comments and processing includes. Note that
* a line that is completely a comment is completely ignored (and
* not returned as a blank line). Returns null at end of file.
*/
public /*@Nullable*/ String readLine() throws IOException {
// System.out.printf ("Entering size = %d%n", readers.size());
// If a line has been pushed back, return it instead
if (pushback_line != null) {
String line = pushback_line;
pushback_line = null;
return line;
}
String line = get_next_line();
if (comment_re != null) {
while (line != null) {
Matcher cmatch = comment_re.matcher (line);
if (cmatch.find()) {
line = cmatch.replaceFirst ("");
if (line.length() > 0)
break;
} else {
break;
}
line = get_next_line();
// System.out.printf ("get_next_line = %s%n", line);
}
}
if (line == null)
return null;
// Handle include files. Non-absolute pathnames are relative
// to the including file (the current file)
if (include_re != null) {
Matcher m = include_re.matcher (line);
if (m.matches()) {
String filename_string = m.group (1);
if (filename_string == null) {
throw new Error(String.format("include_re (%s) does not capture group 1 in %s",
include_re, line));
}
File filename = new File (UtilMDE.expandFilename(filename_string));
// System.out.printf ("Trying to include filename %s%n", filename);
if (!filename.isAbsolute()) {
FlnReader reader = readers.peek();
File current_filename = new File (reader.filename);
File current_parent = current_filename.getParentFile();
filename = new File (current_parent, filename.toString());
// System.out.printf ("absolute filename = %s %s %s%n",
// current_filename, current_parent, filename);
}
readers.push (new FlnReader (filename.getAbsolutePath()));
return readLine();
}
}
// System.out.printf ("Returning [%d] '%s'%n", readers.size(), line);
return (line);
}
/**
* Returns a line-by-line interator for this file.
* <p>
*
* <b>Warning:</b> This does not return a fresh iterator each time. The
* iterator is a singleton, the same one is returned each time, and a new
* one can never be created after it is exhausted.
**/
public Iterator<String> iterator() {
return this;
}
/**
* Returns whether or not there is another line to read. Any IOExceptions
* are turned into errors (because the definition of hasNext() in Iterator
* doesn't throw any exceptions).
**/
public boolean hasNext() {
if (pushback_line != null)
return true;
String line = null;
try {
line = readLine();
} catch (IOException e) {
throw new Error ("unexpected IOException: ", e);
}
if (line == null)
return false;
putback (line);
return true;
}
/**
* Returns the next line in the multi-file.
* Throws NoSuchElementException at end of file.
**/
public String next() {
try {
String result = readLine();
if (result != null) {
return result;
} else {
throw new NoSuchElementException();
}
} catch (IOException e) {
throw new Error ("unexpected IOException", e);
}
}
/** remove() is not supported **/
public void remove() {
throw new UnsupportedOperationException ("can't remove lines from file");
}
/**
* Returns the next entry (paragraph) in the file. Entries are separated
* by blank lines unless the entry started with {@link #entry_start_re}
* (see {@link #set_entry_start_stop}). If no more entries are
* available, returns null.
*/
public /*@Nullable*/ Entry get_entry() throws IOException {
// Skip any preceding blank lines
String line = readLine();
while ((line != null) && (line.trim().length() == 0))
line = readLine();
if (line == null)
return (null);
StringBuilder body = new StringBuilder(10000);
Entry entry = null;
String filename = get_filename();
long line_number = get_line_number();
// If first line matches entry_start_re, this is a long entry.
Matcher entry_match = null;
if (entry_start_re != null)
entry_match = entry_start_re.matcher (line);
if ((entry_match != null) && entry_match.find()) {
assert entry_start_re != null : "@SuppressWarnings(nullness): dependent: entry_match != null";
assert entry_stop_re != null : "@SuppressWarnings(nullness): dependent: entry_start_re != null";
// Remove entry match from the line
if (entry_match.groupCount() > 0) {
@SuppressWarnings("nullness") // dependent: just checked that group 1 exists in match
/*@NonNull*/ String match_group_1 = entry_match.group(1);
line = entry_match.replaceFirst (match_group_1);
}
// Description is the first line
String description = line;
// Read until we find the termination of the entry
Matcher end_entry_match = entry_stop_re.matcher(line);
while ((line != null) && !entry_match.find() &&
!end_entry_match.find() && filename.equals (get_filename())) {
body.append (line);
body.append (lineSep);
line = readLine();
if (line == null) {
break; // end of file serves as entry terminator
}
entry_match = entry_start_re.matcher(line);
end_entry_match = entry_stop_re.matcher(line);
}
// If this entry was terminated by the start of the next one,
// put that line back
if ((line != null) && (entry_match.find(0)
|| !filename.equals (get_filename())))
putback (line);
entry = new Entry (description, body.toString(), filename,
line_number, false);
} else { // blank-separated entry
String description = line;
// Read until we find another blank line
while ((line != null) && (line.trim().length() != 0)
&& filename.equals (get_filename())) {
body.append (line);
body.append (lineSep);
line = readLine();
}
// If this entry was terminated by the start of a new input file
// put that line back
if ((line != null) && !filename.equals (get_filename()))
putback (line);
entry = new Entry (description, body.toString(), filename, line_number,
true);
}
return (entry);
}
/**
* Reads the next line from the current reader. If EOF is encountered
* pop out to the next reader. Returns null if there is no more input.
*/
private /*@Nullable*/ String get_next_line() throws IOException {
if (readers.size() == 0)
return (null);
FlnReader ri1 = readers.peek();
String line = ri1.readLine();
while (line == null) {
readers.pop();
if (readers.empty())
return (null);
FlnReader ri2 = readers.peek();
line = ri2.readLine();
}
return (line);
}
/**
* @deprecated
* @see #getFileName
*/
@Deprecated
public String get_filename() {
return getFileName();
}
/** Returns the current filename **/
public String getFileName() {
FlnReader ri = readers.peek();
if (ri == null)
throw new Error("Past end of input");
return ri.filename;
}
/**
* @deprecated
* @see #getLineNumber
*/
@Deprecated
public long get_line_number() {
return getLineNumber();
}
/** Return the current line number in the current file. **/
@Override
public int getLineNumber() {
FlnReader ri = readers.peek();
if (ri == null)
throw new Error("Past end of input");
return ri.getLineNumber();
}
/** Set the current line number in the current file. **/
@Override
public void setLineNumber(int lineNumber) {
FlnReader ri = readers.peek();
if (ri == null)
throw new Error("Past end of input");
ri.setLineNumber(lineNumber);
}
/**
* Set the regular expressions for the start and stop of long
* entries (multiple lines that are read as a group by get_entry()).
*/
public void set_entry_start_stop (String entry_start_re,
String entry_stop_re) {
this.entry_start_re = Pattern.compile (entry_start_re);
this.entry_stop_re = Pattern.compile (entry_stop_re);
}
/**
* Set the regular expressions for the start and stop of long
* entries (multiple lines that are read as a group by get_entry()).
*/
public void set_entry_start_stop (Pattern entry_start_re,
Pattern entry_stop_re) {
this.entry_start_re = entry_start_re;
this.entry_stop_re = entry_stop_re;
}
/**
* Puts the specified line back in the input. Only one line can be
* put back.
*/
// TODO: This would probably be better implemented with the "mark"
// mechanism of BufferedReader (which is also in LineNumberReader and FlnReader).
public void putback (String line) {
assert pushback_line == null : "push back '" + line + "' when '"
+ pushback_line + "' already back";
pushback_line = line;
}
/** Mark the present position in the stream. */
@Override
public void mark(int readAheadLimit) {
throw new Error("not yet implemented");
}
/** Read a single character. */
@Override
public int read() {
throw new Error("not yet implemented");
}
/** Read characters into a portion of an array. */
@Override
public int read(char[] cbuf, int off, int len) {
throw new Error("not yet implemented");
}
/** Reset the stream to the most recent mark. */
@Override
public void reset() {
throw new Error("not yet implemented");
}
/** Skip characters. */
@Override
public long skip(long n) {
throw new Error("not yet implemented");
}
/** Simple example **/
public static void main (String[] args) throws IOException {
EntryReader reader = new EntryReader (args[0], args[1], args[2]);
String line = reader.readLine();
while (line != null) {
System.out.printf ("%s: %d: %s%n", reader.get_filename(),
reader.get_line_number(), line);
line = reader.readLine();
}
}
}