/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.util.io;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.nio.charset.Charset;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
/**
* This class provides an Iterator interface to a BufferedReader.
* This covers the most common use-cases for reading from files
* without ugly code to check whether we got a line or not.
*
* @author wren ng thornton <wren@users.sourceforge.net>
* @version $LastChangedDate: 2009-03-26 15:06:57 -0400 (Thu, 26 Mar 2009) $
*/
public class LineReader implements Reader<String> {
/* Note: charset name is case-agnostic
* "UTF-8" is the canonical name
* "UTF8", "unicode-1-1-utf-8" are aliases
* Java doesn't distinguish utf8 vs UTF-8 like Perl does
*/
private static final Charset FILE_ENCODING = Charset.forName("UTF-8");
private BufferedReader reader;
private String buffer;
private IOException error;
//===============================================================
// Constructors and destructors
//===============================================================
/**
* Opens a file for iterating line by line. If the file
* name ends in ".gz" then we automatically open it with
* GZIP. File encoding is assumed to be UTF-8.
*
* @param filename the file to be opened
*/
public LineReader(String filename) throws IOException {
this(LineReader.getInputStream(filename));
}
/**
* Wraps an InputStream for iterating line by line. Stream
* encoding is assumed to be UTF-8.
*/
public LineReader(InputStream in) {
this.reader = new BufferedReader(
new InputStreamReader(in, FILE_ENCODING));
}
/**
* Uses a BufferedReader for iterating line by line.
*/
public LineReader(BufferedReader reader) {
this.reader = reader;
}
/**
* Returns an InputStream for a filename, using Joshua's
* canonical means for interpreting that name (e.g\ detecting
* gzipped files). This is used by the LineReader constructor
* that accepts a String argument.
*
* @deprecated This method is provided in order for
* {@link joshua.decoder.DecoderThread} to open files in
* the canonical way for handing off to
* {@link joshua.decoder.segment_file.SegmentFileParser}.
* The <code>SegmentFileParser</code> interface can't be
* made more liberal (e.g. to accept a {@link java.io.Reader})
* because {@link javax.xml.parsers.SAXParser} can't parse
* that argument and no common {@link java.io.Reader} gives
* access to the underlying <code>InputStream</code>. This
* method is considered a hack which should be removed once
* a better solution presents itself.
*/
@Deprecated
public static final InputStream getInputStream(String filename)
throws IOException {
FileInputStream fis = new FileInputStream(filename);
return (filename.endsWith(".gz") ? new GZIPInputStream(fis) : fis);
}
/**
* This method will close the file handle, and will raise
* any exceptions that occured during iteration. The method
* is idempotent, and all calls after the first are no-ops
* (unless the thread was interrupted or killed). For
* correctness, you <b>must</b> call this method before the
* object falls out of scope.
*/
public void close() throws IOException {
this.buffer = null; // Just in case it's a large string
if (null != this.reader) {
try {
// We assume the wrappers will percolate this down.
this.reader.close();
} catch (IOException e) {
// We need to trash our cached error for idempotence.
// Presumably the closing error is the more important
// one to throw.
this.error = null;
throw e;
} finally {
this.reader = null;
}
}
if (null != this.error) {
IOException e = this.error;
this.error = null;
throw e;
}
}
/**
* We attempt to avoid leaking file descriptors if you fail
* to call close before the object falls out of scope.
* However, the language spec makes <b>no guarantees</b>
* about timeliness of garbage collection. It is a bug to
* rely on this method to release the resources. Also, the
* garbage collector will discard any exceptions that have
* queued up, without notifying the application in any way.
*
* Having a finalizer means the JVM can't do "fast allocation"
* of LineReader objects (or subclasses). This isn't too
* important due to disk latency, but may be worth noting.
*
* @see <a href="http://java2go.blogspot.com/2007/09/javaone-2007-performance-tips-2-finish.html">Performance Tips</a>
* @see <a href="http://www.javaworld.com/javaworld/jw-06-1998/jw-06-techniques.html?page=1">Techniques</a>
*/
protected void finalize() throws Throwable {
try {
this.close();
} catch (IOException e) {
// Do nothing. The GC will discard the exception
// anyways, but it may cause us to linger on the heap.
} finally {
super.finalize();
}
}
//===============================================================
// Reader
//===============================================================
// Copied from interface documentation.
/** Determine if the reader is ready to read a line. */
public boolean ready() throws IOException {
return this.reader.ready();
}
/**
* This method is like next() except that it throws the
* IOException directly. If there are no lines to be read
* then null is returned.
*/
public String readLine() throws IOException {
if (this.hasNext()) {
String line = this.buffer;
this.buffer = null;
return line;
} else {
if (null != this.error) {
IOException e = this.error;
this.error = null;
throw e;
}
return null;
}
}
//===============================================================
// Iterable -- because sometimes Java can be very stupid
//===============================================================
/** Return self as an iterator. */
public Iterator<String> iterator() {
return this;
}
//===============================================================
// Iterator
//===============================================================
// Copied from interface documentation.
/**
* Returns <code>true</code> if the iteration has more
* elements. (In other words, returns <code>true</code> if
* <code>next</code> would return an element rather than
* throwing an exception.)
*/
public boolean hasNext() {
if (null != this.buffer) {
return true;
} else if (null != this.error) {
return false;
} else {
// We're not allowed to throw IOException from within Iterator
try {
this.buffer = this.reader.readLine();
} catch (IOException e) {
this.buffer = null;
this.error = e;
return false;
}
return (null != this.buffer);
}
}
/**
* Return the next line of the file. If an error is
* encountered, NoSuchElementException is thrown. The actual
* IOException encountered will be thrown later, when the
* LineReader is closed. Also if there is no line to be
* read then NoSuchElementException is thrown.
*/
public String next() throws NoSuchElementException {
if (this.hasNext()) {
String line = this.buffer;
this.buffer = null;
return line;
} else {
throw new NoSuchElementException();
}
}
/** Unsupported. */
public void remove() throws UnsupportedOperationException {
throw new UnsupportedOperationException();
}
/**
* Iterates over all lines, ignoring their contents, and
* returns the count of lines. If some lines have already
* been read, this will return the count of remaining lines.
* Because no lines will remain after calling this method,
* we implicitly call close.
*
* @return the number of lines read
*/
public int countLines() throws IOException {
int lines = 0;
while (this.hasNext()) {
this.next();
lines++;
}
this.close();
return lines;
}
//===============================================================
// Main
//===============================================================
/** Example usage code. */
public static void main(String[] args) {
if (1 != args.length) {
System.out.println("Usage: java LineReader filename");
System.exit(1);
}
try {
LineReader in = new LineReader(args[0]);
try { for (String line : in) {
System.out.println(line);
} } finally { in.close(); }
} catch (IOException e) {
e.printStackTrace();
}
}
}