/*******************************************************************************
* Copyright (c) 2011 The Board of Trustees of the Leland Stanford Junior University
* as Operator of the SLAC National Accelerator Laboratory.
* Copyright (c) 2011 Brookhaven National Laboratory.
* EPICS archiver appliance is distributed subject to a Software License Agreement found
* in file LICENSE that is included with this distribution.
*******************************************************************************/
package edu.stanford.slac.archiverappliance.PB.utils;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import org.apache.log4j.Logger;
import org.epics.archiverappliance.ByteArray;
import org.epics.archiverappliance.utils.nio.ArchPaths;
/**
* This class wraps a RandomAccessFile and returns byte arrays separated by lines.
* In addition it also maintains a count of the bytes read.
* We expect the file channel to be positioned correctly for the initial read.
* After each read, the channel is positioned just after the newline.
* @author mshankar
*
*/
public class LineByteStream implements Closeable {
private static Logger logger = Logger.getLogger(LineByteStream.class.getName());
public static int MAX_LINE_SIZE = 16 * 1024;
public static int MAX_ITERATIONS_TO_DETERMINE_LINE = 16 * 1024;
private SeekableByteChannel byteChannel = null;
private Path path = null;
byte[] buf = null;
int bytesRead = 0;
int currentReadPosition = 0;
long lastReadPointer = 0;
long totalBytesToRead = Long.MAX_VALUE;
long totalBytesReadSoFar = 0L;
ByteBuffer byteBuf = null;
public LineByteStream(Path path) throws IOException {
this.path = path;
this.byteChannel = ArchPaths.newByteChannel(path, StandardOpenOption.READ);
buf = new byte[MAX_LINE_SIZE];
lastReadPointer = byteChannel.position();
byteBuf = ByteBuffer.allocate(MAX_LINE_SIZE);
readNextBatch();
}
public LineByteStream(Path path, long startPosition) throws IOException {
this.path = path;
this.byteChannel = ArchPaths.newByteChannel(path, StandardOpenOption.READ);
this.byteChannel.position(startPosition);
buf = new byte[MAX_LINE_SIZE];
lastReadPointer = byteChannel.position();
byteBuf = ByteBuffer.allocate(MAX_LINE_SIZE);
readNextBatch();
}
public LineByteStream(Path path, long startPosition, long endPosition) throws IOException {
this.path = path;
this.byteChannel = ArchPaths.newByteChannel(path, StandardOpenOption.READ);
this.byteChannel.position(startPosition);
totalBytesToRead = endPosition - startPosition + 1;
buf = new byte[MAX_LINE_SIZE];
lastReadPointer = byteChannel.position();
byteBuf = ByteBuffer.allocate(MAX_LINE_SIZE);
readNextBatch();
}
private void readNextBatch() throws IOException {
if(totalBytesReadSoFar >= totalBytesToRead) {
bytesRead = 0;
return;
}
lastReadPointer = lastReadPointer+bytesRead;
byteBuf.clear();
bytesRead = this.byteChannel.read(byteBuf);
byteBuf.flip();
if(bytesRead > 0) {
byteBuf.get(buf, 0, bytesRead);
}
currentReadPosition = 0;
long lastTotalBytes = totalBytesReadSoFar;
totalBytesReadSoFar += bytesRead;
if(totalBytesReadSoFar >= totalBytesToRead) {
// The downcasting to int should be safe as the most we'll read over the limit is MAX_LINE_SIZE
int resetBytesRead = (int) (totalBytesToRead - lastTotalBytes);
// We find the first new line and stop there.
while(resetBytesRead < bytesRead && buf[resetBytesRead] != LineEscaper.NEWLINE_CHAR) resetBytesRead++;
if(resetBytesRead <= bytesRead) {
bytesRead = resetBytesRead;
} else {
if(logger.isDebugEnabled()) {
logger.debug("Cannot find newline at tail end of file. resetBytesRead = " + resetBytesRead + " bytesRead=" + bytesRead + " totalBytesReadSoFar=" + totalBytesReadSoFar + "totalBytesToRead=" + totalBytesToRead);
}
}
}
// We leave totalBytesReadSoFar so far at the higher value so the next readNextBatch will terminate at the first if statement.
}
public byte[] readLine() throws IOException {
if(bytesRead <= 0) return null;
ByteArrayOutputStream out = new ByteArrayOutputStream();
int loopcount = 0;
while(loopcount < MAX_ITERATIONS_TO_DETERMINE_LINE) {
int start = currentReadPosition;
int posnofnewlinechar = -1;
while(currentReadPosition < bytesRead) {
if(buf[currentReadPosition++] == LineEscaper.NEWLINE_CHAR) {
posnofnewlinechar = currentReadPosition-1;
break;
}
}
if(posnofnewlinechar == -1) {
int linelength = (bytesRead - start);
out.write(buf, start, linelength);
readNextBatch();
start = currentReadPosition;
if(bytesRead <= 0) {
// End of file reached and we have not found a newline.
// we cannot return what we have as we'll get PBParseExceptions upstream.
return null;
}
} else {
int linelength = (currentReadPosition - start) - 1;
out.write(buf, start, linelength);
return out.toByteArray();
}
}
throw new LineTooLongException("Unable to determine end of line within iteration count " + MAX_ITERATIONS_TO_DETERMINE_LINE);
}
/**
* Optimize the readline by offering the abilty to reuse the same memory allocation.
* This does not escape the bytes as it is reading the line.
* While this is optimal to do, it means for raw responses, we'll be redoing some of the work and raw responses are 90% of the requests.
* If in future, we determine that unescaping here is more optimal, this method has an unescape version in version control history.
* Returns the same byte array as the input.
* @param bar ByteArray
* @return bar ByteArray
* @throws IOException
*/
public ByteArray readLine(ByteArray bar) throws IOException {
bar.reset();
if(bytesRead <= 0 || currentReadPosition >= bytesRead) {
return bar;
}
int loopcount = 0;
while(loopcount++ < MAX_ITERATIONS_TO_DETERMINE_LINE) {
try {
while(currentReadPosition < bytesRead) {
assert(currentReadPosition < buf.length);
byte b = buf[currentReadPosition];
if(b == LineEscaper.NEWLINE_CHAR) {
if(currentReadPosition >= bytesRead - 1) {
readNextBatch();
} else {
currentReadPosition++;
}
return bar;
} else {
bar.data[bar.len++] = b;
}
if(currentReadPosition >= bytesRead - 1) {
readNextBatch();
if(bytesRead <= 0 || currentReadPosition >= bytesRead) {
// We have not found a new line; we cannot return what we have as we'll get PBParseExceptions upstream.
bar.reset();
return bar;
}
} else {
currentReadPosition++;
}
}
} catch(ArrayIndexOutOfBoundsException ex) {
// We would have incremeted the pointer; so decrement it back..
bar.len--;
logger.debug("ByteBuffer is too small, doubling it to accomodate longer lines.");
bar.doubleBufferSize();
}
}
throw new LineTooLongException("Unable to determine end of line within iteration count " + MAX_ITERATIONS_TO_DETERMINE_LINE);
}
/**
* Seeks to the first new line after the current position in the rndAccFile.
* The file pointer is located just after the first newline.
* @throws IOException
*/
public void seekToFirstNewLine() throws IOException {
if(lastReadPointer < 1L) {
// If we are at the start of the file then we return right away.
return;
}
readLine();
}
/**
* Seeks and positions the pointer to to the last line in the file.
* The file pointer is located just before the last line so that readLine gets a valid line.
* About the only thing once can do after this is to read a line and stop...
* @throws IOException
*/
public void seekToBeforeLastLine() throws IOException {
buf = new byte[MAX_LINE_SIZE];
long seekPos = this.byteChannel.size() - MAX_LINE_SIZE;
int loopcount = 0;
while(loopcount < MAX_ITERATIONS_TO_DETERMINE_LINE) {
if(seekPos < 0) seekPos = 0L;
this.byteChannel.position(seekPos);
lastReadPointer = seekPos;
readNextBatch();
// We are shaving off 2 bytes from the end to skip the last newline if indeed the last line is terminated by a newline.
for(int i = bytesRead-2; i >= 0; i--) {
if(buf[i] == LineEscaper.NEWLINE_CHAR) {
currentReadPosition = i+1;
return;
}
}
if(seekPos == 0) {
logger.debug("Is it possible that the file has only line? We have come to the beginning of the file and this should be definitely before the last line.");
return;
}
seekPos = seekPos - MAX_LINE_SIZE;
loopcount++;
}
throw new LineTooLongException("Unable to determine end of line within iteration count " + MAX_ITERATIONS_TO_DETERMINE_LINE);
}
/**
* Seeks and positions the pointer to line previous to the specified position.
* The file pointer is located just so that one can do a readline.
* Note that this method is not efficient at all; so use with care.
* @param posn
* @throws IOException
*/
public void seekToBeforePreviousLine(long posn) throws IOException {
// This is a variation of seekToBeforeLastLine
buf = new byte[MAX_LINE_SIZE];
long seekPos = posn - MAX_LINE_SIZE;
int loopcount = 0;
while(loopcount < MAX_ITERATIONS_TO_DETERMINE_LINE) {
if(seekPos < 0) seekPos = 0L;
this.byteChannel.position(seekPos);
readNextBatch();
lastReadPointer = seekPos;
// If we are reading the first block, we read more than what we need; so adjust what we read to where we need to be.
if(posn < bytesRead) {
bytesRead = (int) posn;
}
// We are shaving off 2 bytes from the end to skip the last newline if indeed the last line is terminated by a newline.
for(int i = bytesRead-2; i >= 0; i--) {
if(buf[i] == LineEscaper.NEWLINE_CHAR) {
currentReadPosition = i+1;
return;
}
}
if(seekPos == 0) {
logger.debug("Is it possible that the file has only line? We have come to the beginning of the file and this should be definitely before the last line.");
return;
}
seekPos = seekPos - MAX_LINE_SIZE;
loopcount++;
}
throw new LineTooLongException("Unable to determine end of line within iteration count " + MAX_ITERATIONS_TO_DETERMINE_LINE);
}
public long getCurrentPosition() throws IOException {
return lastReadPointer + currentReadPosition;
}
public void safeClose() {
try {
this.close();
} catch(Throwable t) {
// Safe close...
}
}
public String getAbsolutePath() {
return this.path.toAbsolutePath().toString();
}
@Override
public void close() throws IOException {
if(this.byteChannel != null) this.byteChannel.close();
this.byteChannel = null;
buf = null;
bytesRead = 0;
currentReadPosition = 0;
}
}