//
// RandomAccessStream.java
//
/*
LOCI Bio-Formats package for reading and converting biological file formats.
Copyright (C) 2005-@year@ Melissa Linkert, Curtis Rueden, Chris Allan,
Eric Kjellman and Brian Loranger.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package loci.formats;
import java.io.*;
import java.util.*;
import java.util.zip.*;
import loci.formats.codec.CBZip2InputStream;
/**
* RandomAccessStream provides methods for "intelligent" reading of files and
* byte arrays. It also automagically deals with closing and reopening files
* to prevent an IOException caused by too many open files.
*
* <dl><dt><b>Source code:</b></dt>
* <dd><a href="https://skyking.microscopy.wisc.edu/trac/java/browser/trunk/loci/formats/RandomAccessStream.java">Trac</a>,
* <a href="https://skyking.microscopy.wisc.edu/svn/java/trunk/loci/formats/RandomAccessStream.java">SVN</a></dd></dl>
*
* @author Melissa Linkert linkert at wisc.edu
*/
public class RandomAccessStream extends InputStream implements DataInput {
// -- Constants --
/** Maximum size of the buffer used by the DataInputStream. */
// 256 KB - please don't change this
protected static final int MAX_OVERHEAD = 262144;
/** Maximum number of buffer sizes to keep. */
protected static final int MAX_HISTORY = 50;
/** Maximum number of open files. */
protected static final int MAX_FILES = 100;
/** Indicators for most efficient method of reading. */
protected static final int DIS = 0;
protected static final int RAF = 1;
protected static final int ARRAY = 2;
// -- Static fields --
/** Hashtable of all files that have been opened at some point. */
private static Hashtable fileCache = new Hashtable();
/** Number of currently open files. */
private static int openFiles = 0;
/** Recent buffer sizes. */
private static int[] bufferSizes = new int[MAX_HISTORY];
// -- Fields --
protected IRandomAccess raf;
protected DataInputStream dis;
/** Length of the file. */
protected long length;
/** The file pointer within the DIS. */
protected long fp;
/** The "absolute" file pointer. */
protected long afp;
/** Most recent mark. */
protected long mark;
/** Next place to mark. */
protected long nextMark;
/** The file name. */
protected String file;
/** Starting buffer. */
protected byte[] buf;
/** Endianness of the stream. */
protected boolean littleEndian = false;
/** Number of bytes by which to extend the stream. */
protected int ext = 0;
/** Number of valid entries in the buffer size array. */
protected int lastValid = 0;
/** Flag indicating this file has been compressed. */
protected boolean compressed = false;
// -- Constructors --
/**
* Constructs a hybrid RandomAccessFile/DataInputStream
* around the given file.
*/
public RandomAccessStream(String file) throws IOException {
File f = new File(Location.getMappedId(file));
f = f.getAbsoluteFile();
if (f.exists()) {
raf = new RAFile(f, "r");
BufferedInputStream bis = new BufferedInputStream(
new FileInputStream(Location.getMappedId(file)), MAX_OVERHEAD);
String path = f.getPath().toLowerCase();
if (path.endsWith(".gz")) {
dis = new DataInputStream(new GZIPInputStream(bis));
compressed = true;
length = 0;
while (dis.available() != 0) {
length += dis.skipBytes(1024);
}
bis = new BufferedInputStream(
new FileInputStream(Location.getMappedId(file)), MAX_OVERHEAD);
dis = new DataInputStream(new GZIPInputStream(bis));
}
else if (path.endsWith(".zip")) {
ZipFile zf = new ZipFile(Location.getMappedId(file));
InputStream zip =
zf.getInputStream((ZipEntry) zf.entries().nextElement());
compressed = true;
length = 0;
while (zip.available() != 0) {
zip.read();
length++;
}
zf = new ZipFile(Location.getMappedId(file));
zip = new BufferedInputStream(zf.getInputStream(
(ZipEntry) zf.entries().nextElement()), MAX_OVERHEAD);
dis = new DataInputStream(zip);
}
else if (path.endsWith(".bz2")) {
bis.skip(2);
dis = new DataInputStream(new CBZip2InputStream(bis));
compressed = true;
length = 0;
int s = 0;
while (s != -1) {
s = dis.read();
length++;
}
bis = new BufferedInputStream(
new FileInputStream(Location.getMappedId(file)), MAX_OVERHEAD);
bis.skip(2);
dis = new DataInputStream(new CBZip2InputStream(bis));
}
else dis = new DataInputStream(bis);
if (!compressed) {
length = raf.length();
buf = new byte[(int) (length < MAX_OVERHEAD ? length : MAX_OVERHEAD)];
raf.readFully(buf);
raf.seek(0);
bufferSizes[0] = MAX_OVERHEAD / 2;
lastValid = 1;
nextMark = MAX_OVERHEAD;
}
}
else if (file.startsWith("http")) {
raf = new RAUrl(Location.getMappedId(file), "r");
length = raf.length();
}
else throw new IOException("File not found : " + file);
this.file = file;
fp = 0;
afp = 0;
fileCache.put(this, Boolean.TRUE);
openFiles++;
if (openFiles > MAX_FILES) cleanCache();
}
/** Constructs a random access stream around the given byte array. */
public RandomAccessStream(byte[] array) throws IOException {
// this doesn't use a file descriptor, so we don't need to add it to the
// file cache
raf = new RABytes(array);
fp = 0;
afp = 0;
length = raf.length();
}
// -- RandomAccessStream API methods --
/** Returns the underlying InputStream. */
public DataInputStream getInputStream() {
try {
if (fileCache.get(this) == Boolean.FALSE) reopen();
}
catch (IOException e) {
return null;
}
return dis;
}
/**
* Sets the number of bytes by which to extend the stream. This only applies
* to InputStream API methods.
*/
public void setExtend(int extend) { ext = extend; }
/** Seeks to the given offset within the stream. */
public void seek(long pos) throws IOException { afp = pos; }
/** Alias for readByte(). */
public int read() throws IOException {
int b = (int) readByte();
if (b == -1 && (afp >= length()) && ext > 0) return 0;
return b;
}
/** Gets the number of bytes in the file. */
public long length() throws IOException {
if (fileCache.get(this) == Boolean.FALSE) reopen();
return length;
}
/** Gets the current (absolute) file pointer. */
public long getFilePointer() { return afp; }
/** Closes the streams. */
public void close() throws IOException {
if (raf != null) raf.close();
raf = null;
if (dis != null) dis.close();
dis = null;
buf = null;
if (fileCache.get(this) != Boolean.FALSE) {
fileCache.put(this, Boolean.FALSE);
openFiles--;
}
}
/** Sets the endianness of the stream. */
public void order(boolean little) { littleEndian = little; }
/** Gets the endianness of the stream. */
public boolean isLittleEndian() { return littleEndian; }
// -- DataInput API methods --
/** Read an input byte and return true if the byte is nonzero. */
public boolean readBoolean() throws IOException {
return (readByte() != 0);
}
/** Read one byte and return it. */
public byte readByte() throws IOException {
int status = checkEfficiency(1);
long oldAFP = afp;
if (afp < length - 1) afp++;
if (status == DIS) {
byte b = dis.readByte();
fp++;
return b;
}
else if (status == ARRAY) {
return buf[(int) oldAFP];
}
else {
byte b = raf.readByte();
return b;
}
}
/** Read an input char. */
public char readChar() throws IOException {
return (char) readByte();
}
/** Read eight bytes and return a double value. */
public double readDouble() throws IOException {
return Double.longBitsToDouble(readLong());
}
/** Read four bytes and return a float value. */
public float readFloat() throws IOException {
return Float.intBitsToFloat(readInt());
}
/** Read four input bytes and return an int value. */
public int readInt() throws IOException {
return DataTools.read4SignedBytes(this, littleEndian);
}
/** Read the next line of text from the input stream. */
public String readLine() throws IOException {
StringBuffer sb = new StringBuffer();
char c = readChar();
while (c != '\n') {
sb = sb.append(c);
c = readChar();
}
return sb.toString();
}
/** Read a string of length n. */
public String readString(int n) throws IOException {
byte[] b = new byte[n];
read(b);
return new String(b);
}
/** Read eight input bytes and return a long value. */
public long readLong() throws IOException {
return DataTools.read8SignedBytes(this, littleEndian);
}
/** Read two input bytes and return a short value. */
public short readShort() throws IOException {
return DataTools.read2SignedBytes(this, littleEndian);
}
/** Read an input byte and zero extend it appropriately. */
public int readUnsignedByte() throws IOException {
return DataTools.readUnsignedByte(this);
}
/** Read two bytes and return an int in the range 0 through 65535. */
public int readUnsignedShort() throws IOException {
return DataTools.read2UnsignedBytes(this, littleEndian);
}
/** Read a string that has been encoded using a modified UTF-8 format. */
public String readUTF() throws IOException {
return null; // not implemented yet...we don't really need this
}
/** Skip n bytes within the stream. */
public int skipBytes(int n) throws IOException {
afp += n;
return n;
}
/** Read bytes from the stream into the given array. */
public int read(byte[] array) throws IOException {
int status = checkEfficiency(array.length);
int n = 0;
if (status == DIS) {
return read(array, 0, array.length);
}
else if (status == ARRAY) {
n = array.length;
if ((buf.length - afp) < array.length) {
n = buf.length - (int) afp;
}
System.arraycopy(buf, (int) afp, array, 0, n);
}
else n = raf.read(array);
afp += n;
if (status == DIS) fp += n;
if (n < array.length && ext > 0) {
while (n < array.length && ext > 0) {
n++;
ext--;
}
}
return n;
}
/**
* Read n bytes from the stream into the given array at the specified offset.
*/
public int read(byte[] array, int offset, int n) throws IOException {
int toRead = n;
int status = checkEfficiency(n);
if (status == DIS) {
int p = dis.read(array, offset, n);
if (p == -1) return -1;
if ((p >= 0) && ((fp + p) < length)) {
int k = p;
while ((k >= 0) && (p < n) && ((afp + p) <= length) &&
((offset + p) < array.length))
{
k = dis.read(array, offset + p, n - p);
if (k >= 0) p += k;
}
}
n = p;
}
else if (status == ARRAY) {
if ((buf.length - afp) < n) n = buf.length - (int) afp;
System.arraycopy(buf, (int) afp, array, offset, n);
}
else {
n = raf.read(array, offset, n);
}
afp += n;
if (status == DIS) fp += n;
if (n < toRead && ext > 0) {
while (n < array.length && ext > 0) {
n++;
ext--;
}
}
return n;
}
/** Read bytes from the stream into the given array. */
public void readFully(byte[] array) throws IOException {
int status = checkEfficiency(array.length);
if (status == DIS) {
readFully(array, 0, array.length);
}
else if (status == ARRAY) {
System.arraycopy(buf, (int) afp, array, 0, array.length);
}
else {
raf.readFully(array, 0, array.length);
}
afp += array.length;
if (status == DIS) fp += array.length;
}
/**
* Read n bytes from the stream into the given array at the specified offset.
*/
public void readFully(byte[] array, int offset, int n) throws IOException {
int status = checkEfficiency(n);
if (status == DIS) {
dis.readFully(array, offset, n);
}
else if (status == ARRAY) {
System.arraycopy(buf, (int) afp, array, offset, n);
}
else {
raf.readFully(array, offset, n);
}
afp += n;
if (status == DIS) fp += n;
}
// -- InputStream API methods --
public int available() throws IOException {
if (fileCache.get(this) == Boolean.FALSE) reopen();
int available = dis != null ? dis.available() + ext :
(int) (length() - getFilePointer());
if (available < 0) available = Integer.MAX_VALUE;
return available;
}
public void mark(int readLimit) {
try {
if (fileCache.get(this) == Boolean.FALSE) reopen();
}
catch (IOException e) { }
if (!compressed) dis.mark(readLimit);
}
public boolean markSupported() { return !compressed; }
public void reset() throws IOException {
if (fileCache.get(this) == Boolean.FALSE) reopen();
dis.reset();
fp = length() - dis.available();
}
// -- Helper methods - I/O --
/** Naive heuristic for determining a "good" buffer size for the DIS. */
protected int determineBuffer() {
// first we want the weighted average of previous buffer sizes
int sum = 0;
int div = 0;
int ndx = 0;
while ((ndx < lastValid) && (ndx < MAX_HISTORY)) {
int size = bufferSizes[ndx];
sum += (size * ((ndx / (MAX_HISTORY / 5)) + 1));
div += (ndx / (MAX_HISTORY / 5)) + 1;
ndx++;
}
int newSize = sum / div;
if (newSize > MAX_OVERHEAD) newSize = MAX_OVERHEAD;
if (lastValid < MAX_HISTORY) {
bufferSizes[lastValid] = newSize;
lastValid++;
}
else {
bufferSizes[0] = newSize;
}
return newSize;
}
/**
* Determine whether it is more efficient to use the DataInputStream or
* RandomAccessFile for reading (based on the current file pointers).
* Returns 0 if we should use the DataInputStream, 1 if we should use the
* RandomAccessFile, and 2 for a direct array access.
*/
protected int checkEfficiency(int toRead) throws IOException {
if (fileCache.get(this) == Boolean.FALSE) reopen();
int oldBufferSize = bufferSizes[bufferSizes.length - 1];
if (compressed) {
// can only read from the input stream
if (afp < fp) {
dis.close();
BufferedInputStream bis = new BufferedInputStream(
new FileInputStream(Location.getMappedId(file)), MAX_OVERHEAD);
String path = Location.getMappedId(file).toLowerCase();
if (path.endsWith(".gz")) {
dis = new DataInputStream(new GZIPInputStream(bis));
}
else if (path.endsWith(".zip")) {
ZipFile zf = new ZipFile(Location.getMappedId(file));
InputStream zip = new BufferedInputStream(zf.getInputStream(
(ZipEntry) zf.entries().nextElement()), MAX_OVERHEAD);
dis = new DataInputStream(zip);
}
else if (path.endsWith(".bz2")) {
bis.skip(2);
dis = new DataInputStream(new CBZip2InputStream(bis));
}
fp = 0;
}
while (fp < afp) {
fp += dis.skipBytes((int) (afp - fp));
}
return DIS;
}
if (dis != null) {
while (fp > (length() - dis.available())) {
while (fp - length() + dis.available() > Integer.MAX_VALUE) {
dis.skipBytes(Integer.MAX_VALUE);
}
dis.skipBytes((int) (fp - (length() - dis.available())));
}
}
if (dis != null && raf != null &&
afp + toRead < MAX_OVERHEAD && afp + toRead < raf.length())
{
// this is a really special case that allows us to read directly from
// an array when working with the first MAX_OVERHEAD bytes of the file
// ** also note that it doesn't change the stream
return ARRAY;
}
else if (afp >= fp && dis != null) {
while (fp < afp) {
while (afp - fp > Integer.MAX_VALUE) {
fp += dis.skipBytes(Integer.MAX_VALUE);
}
int skip = dis.skipBytes((int) (afp - fp));
if (skip == 0) break;
fp += skip;
}
if (lastValid < MAX_HISTORY) {
bufferSizes[lastValid] = MAX_OVERHEAD;
lastValid++;
}
else {
bufferSizes[0] = MAX_OVERHEAD;
}
if (fp >= nextMark) {
dis.mark(MAX_OVERHEAD);
}
nextMark = fp + MAX_OVERHEAD;
mark = fp;
return DIS;
}
else {
if (dis != null && afp >= mark && fp < mark + oldBufferSize) {
int newBufferSize = determineBuffer();
boolean valid = true;
try {
dis.reset();
}
catch (IOException io) {
valid = false;
}
if (valid) {
dis.mark(newBufferSize);
//fp = mark;
fp = length() - dis.available();
while (fp < afp) {
while (afp - fp > Integer.MAX_VALUE) {
fp += dis.skipBytes(Integer.MAX_VALUE);
}
int skip = dis.skipBytes((int) (afp - fp));
if (skip == 0) break;
fp += skip;
}
if (fp >= nextMark) {
dis.mark(newBufferSize);
}
nextMark = fp + newBufferSize;
mark = fp;
return DIS;
}
else {
raf.seek(afp);
return RAF;
}
}
else {
// we don't want this to happen very often
raf.seek(afp);
return RAF;
}
}
}
// -- Helper methods - cache management --
/** Re-open a file that has been closed */
private void reopen() throws IOException {
File f = new File(Location.getMappedId(file));
f = f.getAbsoluteFile();
if (f.exists()) {
raf = new RAFile(f, "r");
BufferedInputStream bis = new BufferedInputStream(
new FileInputStream(Location.getMappedId(file)), MAX_OVERHEAD);
String path = f.getPath().toLowerCase();
if (path.endsWith(".gz")) {
dis = new DataInputStream(new GZIPInputStream(bis));
compressed = true;
}
else if (path.endsWith(".zip")) {
ZipFile zf = new ZipFile(Location.getMappedId(file));
InputStream zip = new BufferedInputStream(zf.getInputStream(
(ZipEntry) zf.entries().nextElement()), MAX_OVERHEAD);
dis = new DataInputStream(zip);
compressed = true;
}
else if (path.endsWith(".bz2")) {
bis.skip(2);
dis = new DataInputStream(new CBZip2InputStream(bis));
compressed = true;
}
else dis = new DataInputStream(bis);
if (!compressed) {
length = raf.length();
buf = new byte[(int) (length < MAX_OVERHEAD ? length : MAX_OVERHEAD)];
raf.readFully(buf);
raf.seek(0);
}
}
else {
raf = new RAUrl(Location.getMappedId(file), "r");
length = raf.length();
}
fileCache.put(this, Boolean.TRUE);
openFiles++;
if (openFiles > MAX_FILES) cleanCache();
}
/** If we have too many open files, close most of them. */
private void cleanCache() {
int toClose = MAX_FILES - 10;
RandomAccessStream[] files = (RandomAccessStream[])
fileCache.keySet().toArray(new RandomAccessStream[0]);
int closed = 0;
int ndx = 0;
while (closed < toClose) {
if (!this.equals(files[ndx]) &&
!fileCache.get(files[ndx]).equals(Boolean.FALSE))
{
try { files[ndx].close(); }
catch (IOException exc) { LogTools.trace(exc); }
closed++;
}
ndx++;
}
}
}