package com.linkedin.databus.core;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.log4j.Logger;
/**
*
* A single File InputStream implementation which is capable of reading a file which is getting concurrently appended.
*
* For static files, this would be a regular fileInputStream
* For concurrently appended files, this acts like a stream only returning EOF when the concurrent writer completes and notifies.
*
* Concurrent update (non-append type) is not supported and behavior is undefined
*/
public class ConcurrentAppendableSingleFileInputStream
extends InputStream
{
public static final String MODULE = ConcurrentAppendableSingleFileInputStream.class.getName();
public static final Logger LOG = Logger.getLogger(MODULE);
public static final int EOF = -1;
public static final int NULL_CHAR = 0x0;
/**
*
* Behavior when EOF is received from underlying stream
*/
public static enum EOFSyncBehavior
{
NO_SYNC, // Return EOF if received from underlying stream without blocking
SYNC_ONCE, // Sync once to check for more data. If EOF again, return EOF
SYNC_TILL_NEW_DATA // Block and peridically sync till new appended data is seen in the stream
};
/** File representation */
private final File _file;
/** ReSync interval in Ms */
private final long _resyncIntervalMs;
/** Byte Offset from which to start reading **/
private final long _byteOffset;
/** Underlying File Input Stream */
private FileInputStream _inputStream = null;
/** Behavior for sync on EOF */
private volatile EOFSyncBehavior _eofSync = null;
/** Flag to indicate the channel is closed **/
private volatile boolean _closed = false;
/** Cached last Modified timestamp */
private long _lastModifiedTimestamp;
/** File Position needed for resync */
private long _currOffset = 0;
/** Previous File Position needed for resync */
private long _prevOffset = 0;
/** Number of byte read calls for this stream */
private volatile long _numReadCalls = 0;
/** Number of byte read calls that resulted in a valid byte being sent **/
private volatile long _numReadCallsWithData = 0;
/**
*
* Factory to create an inputStream from a file that is static (do not wait for more input if EOF is reached)
*
* @param fileName : Absolute path to the file
* @param byteOffset : Byte Offset within the file where the read has to start
* @return InputStream corresponding to the file
*
* @throws IOException if unable to access the file
*/
public static ConcurrentAppendableSingleFileInputStream createStaticFileInputStream(String fileName,
long byteOffset)
throws IOException
{
return new ConcurrentAppendableSingleFileInputStream(fileName, EOFSyncBehavior.NO_SYNC, byteOffset, 0);
}
/**
*
* Factory to create an inputStream from a file that is concurrently getting appended. The client is responsible for
* calling the API {@link appendDone()} for the stream to detect end of the stream and return EOF.
*
* @param fileName : Absolute path to the file
* @param byteOffset : Byte Offset within the file where the read has to start
* @param syncInterval : interval between syncing when EOF is detected by the underlying stream
*
* @return InputStream corresponding to the file
*
* @throws IOException if unable to access the file
*/
public static ConcurrentAppendableSingleFileInputStream createAppendingFileInputStream(String fileName, long byteOffset, long syncInterval)
throws IOException
{
return new ConcurrentAppendableSingleFileInputStream(fileName, EOFSyncBehavior.SYNC_TILL_NEW_DATA, byteOffset, syncInterval);
}
/**
*
* Construct a single File input-stream
*
* @param fileName : Absolute file path
* @param eofSyncBehavior :EOF Sync behavior
* @param byteOffset : ByteOffset from which read has to start
* @param resyncIntervalMs : Resync Interval on reaching EOF
* @throws IOException
*/
private ConcurrentAppendableSingleFileInputStream(String fileName,
EOFSyncBehavior eofSyncBehavior,
long byteOffset,
long resyncIntervalMs)
throws IOException
{
_byteOffset = byteOffset;
_file = new File(fileName);
_eofSync = eofSyncBehavior;
_resyncIntervalMs = resyncIntervalMs;
_lastModifiedTimestamp = _file.lastModified();
syncStreamOnce(true);
if ( _byteOffset > 0 )
_inputStream.getChannel().position(_byteOffset);
_currOffset = _inputStream.getChannel().position();
_prevOffset = _currOffset;
}
@Override
public synchronized int read()
throws IOException
{
int retVal = _inputStream.read();
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
_numReadCalls++;
boolean isDebugEnabled = LOG.isDebugEnabled();
// if valid byte, return immediately
if ( (retVal != EOF) && (retVal != NULL_CHAR))
{
_numReadCallsWithData++;
if (isDebugEnabled)
LOG.debug("Byte returned (non-EOF) is :" + retVal + ", State is :" + toString());
return retVal;
}
boolean done = false;
while ( ((retVal == EOF) || (retVal == NULL_CHAR))
&& (!_closed) && (!done))
{
if ( retVal == NULL_CHAR)
{
// Force sync using prevOffset
syncOnceAndSleep();
retVal = _inputStream.read();
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
continue;
}
switch (_eofSync)
{
case NO_SYNC :
{
retVal = _inputStream.read();
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
if (isDebugEnabled)
LOG.debug("Byte returned (NO_SYNC) is :" + retVal + ", State is :" + toString());
done = true;
break;
}
case SYNC_ONCE :
{
// sync and set behavior to NO_SYNC
syncStreamOnce(true);
_eofSync = EOFSyncBehavior.NO_SYNC;
break;
}
case SYNC_TILL_NEW_DATA :
{
// sync and read the next byte
syncOnceAndSleep();
retVal = _inputStream.read();
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
break;
}
}
}
if (retVal != EOF)
_numReadCallsWithData++;
if (isDebugEnabled)
LOG.debug("Byte returned is :" + retVal + ", State is :" + toString());
return retVal;
}
/**
*
* Verifies if the byte-array contains NULL character between offset and len
*
* @param b Data buffer
* @param offset start location for scanning
* @param len number of bytes to scan
* @return true if NULL character is found, otherwise false
*/
private boolean isNullByteSeen(byte[]b, int offset, int len)
{
int limit = offset + len;
for (int i = offset; i < limit; i++)
{
if ( b[i] == NULL_CHAR)
{
LOG.warn("Found NULL character from data read from underlying stream. Offset :" + offset + ", Length :" + len + " Re-Syncing");
return true;
}
}
return false;
}
@Override
public synchronized int read(byte[] b)
throws IOException
{
int retVal = _inputStream.read(b);
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
_numReadCalls++;
boolean nullBytesSeen = false;
boolean isDebugEnabled = LOG.isDebugEnabled();
// if not EOF or NULL, return immediately
if ( retVal != EOF )
{
nullBytesSeen = isNullByteSeen(b, 0, retVal);
if ( ! nullBytesSeen )
{
_numReadCallsWithData++;
if (isDebugEnabled)
LOG.debug("Num Bytes returned (non-EOF) is :" + retVal + ", State is :" + toString());
return retVal;
}
}
boolean done = false;
while ( ((retVal == EOF) || nullBytesSeen)
&& (!_closed) && (!done))
{
if ( nullBytesSeen )
{
// Force sync using prevOffset
syncOnceAndSleep();
retVal = _inputStream.read(b);
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
} else {
switch (_eofSync)
{
case NO_SYNC :
{
retVal = _inputStream.read(b);
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
if (isDebugEnabled)
LOG.debug("Num bytes returned (NO_SYNC) is :" + retVal + ", State is :" + toString());
done = true;
break;
}
case SYNC_ONCE :
{
// sync and set behavior to NO_SYNC
syncStreamOnce(true);
_eofSync = EOFSyncBehavior.NO_SYNC;
break;
}
case SYNC_TILL_NEW_DATA :
{
// sync and read the next byte
syncOnceAndSleep();
retVal = _inputStream.read(b);
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
break;
}
}
}
nullBytesSeen = isNullByteSeen(b, 0, retVal);
}
if (retVal != EOF)
_numReadCallsWithData++;
if (isDebugEnabled)
LOG.debug("Num Bytes returned is :" + retVal + ", State is :" + toString());
return retVal;
}
@Override
public synchronized int read(byte b[], int off, int len)
throws IOException
{
int retVal = _inputStream.read(b, off, len);
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
_numReadCalls++;
boolean isDebugEnabled = LOG.isDebugEnabled();
boolean nullBytesSeen = false;
// if not EOF or NULL, return immediately
if ( retVal != EOF )
{
nullBytesSeen = isNullByteSeen(b, off, retVal);
if ( ! nullBytesSeen)
{
_numReadCallsWithData++;
if (isDebugEnabled)
LOG.debug("Num Bytes returned (non-EOF) is :" + retVal + ", State is :" + toString());
return retVal;
}
}
boolean done = false;
while ( ((retVal == EOF) || nullBytesSeen)
&& (!_closed) && (!done))
{
if ( nullBytesSeen )
{
// Force sync using prevOffset
syncOnceAndSleep();
retVal = _inputStream.read(b, off, len);
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
} else {
switch (_eofSync)
{
case NO_SYNC :
{
retVal = _inputStream.read(b, off, len);
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
if (isDebugEnabled)
LOG.debug("Num bytes returned (NO_SYNC) is :" + retVal + ", State is :" + toString());
done = true;
}
case SYNC_ONCE :
{
// sync and set behavior to NO_SYNC
syncStreamOnce(true);
_eofSync = EOFSyncBehavior.NO_SYNC;
break;
}
case SYNC_TILL_NEW_DATA :
{
// sync and read the next byte
syncOnceAndSleep();
if ( ! _closed)
{
retVal = _inputStream.read(b, off, len);
_prevOffset = _currOffset;
_currOffset = _inputStream.getChannel().position();
}
break;
}
}
}
nullBytesSeen = isNullByteSeen(b, off, retVal);
}
if (retVal != EOF)
_numReadCallsWithData++;
if (isDebugEnabled)
LOG.debug("Num Bytes returned is :" + retVal + ", State is :" + toString());
return retVal;
}
/**
*
* Decide if stream is to be synced. If not,then sleep
*
* @throws IOException
*/
private synchronized void syncOnceAndSleep()
throws IOException
{
// Ensure eofSync is Sync_TILL_NEW_DATA once inside the monitor
if(_eofSync == EOFSyncBehavior.SYNC_TILL_NEW_DATA)
{
boolean refreshed = syncStreamOnce(false);
if ( ! refreshed )
{
try
{
wait(_resyncIntervalMs);
} catch (InterruptedException ie) {
}
}
}
}
/**
*
* Sync by reopening underlying stream.
* If force == false, syncing is done only if lastModifiedTimestamp got updated otherwise, syncing is done unconditionally
*
* @param force
* @throws IOException
*/
private synchronized boolean syncStreamOnce(boolean force)
throws IOException
{
boolean doSync = force;
long newLastModifiedTs = _file.lastModified();
if ( !force )
{
if (_lastModifiedTimestamp < newLastModifiedTs)
doSync = true;
}
if ( doSync)
{
if(LOG.isDebugEnabled())
LOG.debug("Syncing from file :" + _file);
closeStream();
_inputStream = new FileInputStream(_file);
_inputStream.getChannel().position(_prevOffset);
_lastModifiedTimestamp = newLastModifiedTs;
} else {
// Even if sync did not happen, we should align channel position with the offset
_inputStream.getChannel().position(_prevOffset);
}
//Re-align the curOffset
_currOffset = _inputStream.getChannel().position();
return doSync;
}
@Override
public synchronized void close() throws IOException
{
LOG.info("Closing ConcurrentAppendableSingleFileInputStream for file :" + _file);
_closed = true;
closeStream();
notifyAll();
}
public void closeStream() throws IOException
{
if ( null != _inputStream)
_inputStream.close();
}
public EOFSyncBehavior getEOFSyncBehavior() {
return _eofSync;
}
/**
* Notification by the client that concurrent append had been done and safe for this stream to return EOF if detected.
*/
public synchronized void appendDone()
{
LOG.info("Marking file appending done for inputStream :" + toString());
// Safe to sync one more time when append is done !!
this._eofSync = EOFSyncBehavior.SYNC_ONCE;
notifyAll();
}
@Override
public String toString() {
return "ConcurrentAppendableSingleFileInputStream [_file=" + _file
+ ", _resyncIntervalMs=" + _resyncIntervalMs
+ ", _inputStream=" + _inputStream + ", _eofSync=" + _eofSync
+ ", _closed=" + _closed + ", _lastModifiedTimestamp="
+ _lastModifiedTimestamp + ", _currOffset=" + _currOffset
+ ", _numReadCalls=" + _numReadCalls
+ ", _numReadCallsWithData=" + _numReadCallsWithData + "]";
}
public long getNumReadCalls()
{
return _numReadCalls;
}
public long geNumReadCallsWithData()
{
return _numReadCallsWithData;
}
public File getFile()
{
return _file;
}
public synchronized long getCurrPosition()
{
long offset = -1;
try
{
offset = _inputStream.getChannel().position();
} catch (Exception ex) {
LOG.error("Got exception when getting the current position. State :" + toString(), ex);
}
return offset;
}
}