package com.tom_roush.pdfbox.io;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* Provides {@link InputStream} access to portions of a file combined with
* buffered reading of content. Start of next bytes to read can be set via seek
* method.
*
* File is accessed via {@link RandomAccessFile} and is read in byte chunks
* which are cached.
*
* @author Timo Boehme
*/
public class RandomAccessBufferedFileInputStream
extends InputStream implements RandomAccessRead
{
/**
* The prefix for the temp file being used.
*/
private static final String TMP_FILE_PREFIX = "tmpPDFBox";
private int pageSizeShift = 12;
private int pageSize = 1 << pageSizeShift;
private long pageOffsetMask = -1L << pageSizeShift;
private int maxCachedPages = 1000;
private File tempFile;
private byte[] lastRemovedCachePage = null;
/** Create a LRU page cache. */
private final Map<Long, byte[]> pageCache =
new LinkedHashMap<Long, byte[]>( maxCachedPages, 0.75f, true )
{
private static final long serialVersionUID = -6302488539257741101L;
@Override
protected boolean removeEldestEntry( Map.Entry<Long, byte[]> eldest )
{
final boolean doRemove = size() > maxCachedPages;
if (doRemove)
{
lastRemovedCachePage = eldest.getValue();
}
return doRemove;
}
};
private long curPageOffset = -1;
private byte[] curPage = new byte[pageSize];
private int offsetWithinPage = 0;
private final RandomAccessFile raFile;
private final long fileLength;
private long fileOffset = 0;
private boolean isClosed;
/**
* Create a random access input stream instance for the file with the given name.
*
* @param filename the filename of the file to be read.
* @throws IOException if something went wrong while accessing the given file.
*/
public RandomAccessBufferedFileInputStream(String filename) throws IOException
{
this(new File(filename));
}
/**
* Create a random access input stream instance for the given file.
*
* @param file the file to be read.
* @throws IOException if something went wrong while accessing the given file.
*/
public RandomAccessBufferedFileInputStream(File file) throws IOException
{
raFile = new RandomAccessFile(file, "r");
fileLength = file.length();
seek(0);
}
/**
* Create a random access input stream for the given input stream by copying the data to a
* temporary file.
*
* @param input the input stream to be read.
* @throws IOException if something went wrong while creating the temporary file.
*/
public RandomAccessBufferedFileInputStream(InputStream input) throws IOException
{
tempFile = createTmpFile(input);
fileLength = tempFile.length();
raFile = new RandomAccessFile(tempFile, "r");
seek(0);
}
private File createTmpFile(InputStream input) throws IOException
{
FileOutputStream fos = null;
try
{
File tmpFile = File.createTempFile(TMP_FILE_PREFIX, ".pdf");
fos = new FileOutputStream(tmpFile);
IOUtils.copy(input, fos);
return tmpFile;
}
finally
{
IOUtils.closeQuietly(input);
IOUtils.closeQuietly(fos);
}
}
/**
* Remove the temporary file. A temporary file is created if this class is instantiated with an InputStream
*/
private void deleteTempFile()
{
if (tempFile != null)
{
tempFile.delete();
}
}
/** Returns offset in file at which next byte would be read. */
@Override
public long getPosition()
{
return fileOffset;
}
/**
* Seeks to new position. If new position is outside of current page the new page is either
* taken from cache or read from file and added to cache.
*
* @param newOffset the position to seek to.
* @throws java.io.IOException if something went wrong.
*/
@Override
public void seek( final long newOffset ) throws IOException
{
final long newPageOffset = newOffset & pageOffsetMask;
if ( newPageOffset != curPageOffset )
{
byte[] newPage = pageCache.get( newPageOffset );
if ( newPage == null )
{
raFile.seek( newPageOffset );
newPage = readPage();
pageCache.put( newPageOffset, newPage );
}
curPageOffset = newPageOffset;
curPage = newPage;
}
offsetWithinPage = (int) ( newOffset - curPageOffset );
fileOffset = newOffset;
}
/**
* Reads a page with data from current file position. If we have a
* previously removed page from cache the buffer of this page is reused.
* Otherwise a new byte buffer is created.
*/
private byte[] readPage() throws IOException
{
byte[] page;
if ( lastRemovedCachePage != null )
{
page = lastRemovedCachePage;
lastRemovedCachePage = null;
}
else
{
page = new byte[pageSize];
}
int readBytes = 0;
while ( readBytes < pageSize )
{
int curBytesRead = raFile.read( page, readBytes, pageSize - readBytes);
if (curBytesRead < 0)
{
// EOF
break;
}
readBytes += curBytesRead;
}
return page;
}
@Override
public int read() throws IOException
{
if ( fileOffset >= fileLength )
{
return -1;
}
if ( offsetWithinPage == pageSize )
{
seek( fileOffset );
}
fileOffset++;
return curPage[offsetWithinPage++] & 0xff;
}
@Override
public int read(byte[] b) throws IOException
{
return read(b, 0, b.length);
}
@Override
public int read( byte[] b, int off, int len ) throws IOException
{
if ( fileOffset >= fileLength )
{
return -1;
}
if ( offsetWithinPage == pageSize )
{
seek( fileOffset );
}
int commonLen = Math.min( pageSize - offsetWithinPage, len );
if ( ( fileLength - fileOffset ) < pageSize )
{
commonLen = Math.min( commonLen, (int) ( fileLength - fileOffset ) );
}
System.arraycopy( curPage, offsetWithinPage, b, off, commonLen );
offsetWithinPage += commonLen;
fileOffset += commonLen;
return commonLen;
}
@Override
public int available() throws IOException
{
return (int) Math.min( fileLength - fileOffset, Integer.MAX_VALUE );
}
@Override
public long skip( long n ) throws IOException
{
// test if we have to reduce skip count because of EOF
long toSkip = n;
if ( fileLength - fileOffset < toSkip )
{
toSkip = fileLength - fileOffset;
}
if ( ( toSkip < pageSize ) && ( ( offsetWithinPage + toSkip ) <= pageSize ) )
{
// we can skip within current page
offsetWithinPage += toSkip;
fileOffset += toSkip;
}
else
{
// seek to the page we will get after skipping
seek( fileOffset + toSkip );
}
return toSkip;
}
@Override
public long length() throws IOException
{
return fileLength;
}
@Override
public void close() throws IOException
{
raFile.close();
deleteTempFile();
pageCache.clear();
isClosed = true;
}
@Override
public boolean isClosed()
{
return isClosed;
}
@Override
public int peek() throws IOException
{
int result = read();
if (result != -1)
{
rewind(1);
}
return result;
}
@Override
public void rewind(int bytes) throws IOException
{
seek(getPosition() - bytes);
}
@Override
public byte[] readFully(int length) throws IOException
{
byte[] b = new byte[length];
int bytesRead = read(b);
while (bytesRead < length)
{
bytesRead += read(b, bytesRead, length - bytesRead);
}
return b;
}
@Override
public boolean isEOF() throws IOException
{
int peek = peek();
return peek == -1;
}
}