/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.infrastructure.io;
import java.io.*;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.log4j.Logger;
import com.facebook.infrastructure.config.DatabaseDescriptor;
import com.facebook.infrastructure.db.RowMutation;
import com.facebook.infrastructure.io.IndexHelper.ColumnPositionInfo;
import com.facebook.infrastructure.utils.LogUtil;
/**
* This class writes key/value pairs seqeuntially to disk. It is
* also used to read sequentially from disk. However one could
* jump to random positions to read data from the file. This class
* also has many implementations of the IFileWriter and IFileReader
* interfaces which are exposed through factory methods.
*
* Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com ) & Karthik Ranganathan ( kranganathan@facebook.com )
*/
public class SequenceFile
{
public static abstract class AbstractWriter implements IFileWriter
{
protected String filename_;
AbstractWriter(String filename)
{
filename_ = filename;
}
public String getFileName()
{
return filename_;
}
public long lastModified()
{
File file = new File(filename_);
return file.lastModified();
}
}
public static class Writer extends AbstractWriter
{
private RandomAccessFile file_;
Writer(String filename) throws IOException
{
super(filename);
File file = new File(filename);
boolean isNewFile = false;
if ( !file.exists() )
{
file.createNewFile();
}
file_ = new RandomAccessFile(file, "rw");
}
public long getCurrentPosition() throws IOException
{
return file_.getFilePointer();
}
public void seek(long position) throws IOException
{
file_.seek(position);
}
public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException
{
int keyBufLength = keyBuffer.getLength();
if ( keyBuffer == null || keyBufLength == 0 )
throw new IllegalArgumentException("Key cannot be NULL or of zero length.");
file_.seek(file_.getFilePointer());
file_.writeInt(keyBufLength);
file_.write(keyBuffer.getData(), 0, keyBufLength);
int length = buffer.getLength();
file_.writeInt(length);
file_.write(buffer.getData(), 0, length);
}
public void append(String key, DataOutputBuffer buffer) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
file_.seek(file_.getFilePointer());
file_.writeUTF(key);
int length = buffer.getLength();
file_.writeInt(length);
file_.write(buffer.getData(), 0, length);
}
public void append(String key, byte[] value) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
file_.seek(file_.getFilePointer());
file_.writeUTF(key);
file_.writeInt(value.length);
file_.write(value);
}
public void append(String key, long value) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
file_.seek(file_.getFilePointer());
file_.writeUTF(key);
file_.writeLong(value);
}
/**
* Be extremely careful while using this API. This currently
* used to write the commit log header in the commit logs.
* If not used carefully it could completely screw up reads
* of other key/value pairs that are written.
* @param bytes the bytes to write
*/
public long writeDirect(byte[] bytes) throws IOException
{
file_.write(bytes);
return file_.getFilePointer();
}
public void close() throws IOException
{
file_.close();
}
public void close(byte[] footer, int size) throws IOException
{
file_.writeUTF(SequenceFile.marker_);
file_.writeInt(size);
file_.write(footer);
}
public String getFileName()
{
return filename_;
}
public long getFileSize() throws IOException
{
return file_.length();
}
}
public static class BufferWriter extends AbstractWriter
{
private BufferedRandomAccessFile file_;
private long position_ = 0L;
BufferWriter(String filename, int size) throws IOException
{
super(filename);
File file = new File(filename);
file_ = new BufferedRandomAccessFile(file, "rw", size);
if ( !file.exists() )
{
file.createNewFile();
}
}
public long getCurrentPosition() throws IOException
{
return file_.getFilePointer();
}
public void seek(long position) throws IOException
{
file_.seek(position);
}
public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException
{
int keyBufLength = keyBuffer.getLength();
if ( keyBuffer == null || keyBufLength == 0 )
throw new IllegalArgumentException("Key cannot be NULL or of zero length.");
file_.seek(file_.getFilePointer());
file_.writeInt(keyBufLength);
file_.write(keyBuffer.getData(), 0, keyBufLength);
int length = buffer.getLength();
file_.writeInt(length);
file_.write(buffer.getData(), 0, length);
}
public void append(String key, DataOutputBuffer buffer) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
file_.seek(file_.getFilePointer());
file_.writeUTF(key);
int length = buffer.getLength();
file_.writeInt(length);
file_.write(buffer.getData(), 0, length);
}
public void append(String key, byte[] value) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
file_.seek(file_.getFilePointer());
file_.writeUTF(key);
file_.writeInt(value.length);
file_.write(value);
}
public void append(String key, long value) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
file_.seek(file_.getFilePointer());
file_.writeUTF(key);
file_.writeLong(value);
}
/**
* Be extremely careful while using this API. This currently
* used to write the commit log header in the commit logs.
* If not used carefully it could completely screw up reads
* of other key/value pairs that are written.
* @param bytes the bytes to write
*/
public long writeDirect(byte[] bytes) throws IOException
{
file_.write(bytes);
return file_.getFilePointer();
}
public void close() throws IOException
{
file_.close();
}
public void close(byte[] footer, int size) throws IOException
{
file_.writeUTF(SequenceFile.marker_);
file_.writeInt(size);
file_.write(footer);
}
public String getFileName()
{
return filename_;
}
public long getFileSize() throws IOException
{
return file_.length();
}
}
public static class ConcurrentWriter extends AbstractWriter
{
private FileChannel fc_;
public ConcurrentWriter(String filename) throws IOException
{
super(filename);
RandomAccessFile raf = new RandomAccessFile(filename, "rw");
fc_ = raf.getChannel();
}
public long getCurrentPosition() throws IOException
{
return fc_.position();
}
public void seek(long position) throws IOException
{
fc_.position(position);
}
public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException
{
int keyBufLength = keyBuffer.getLength();
if ( keyBuffer == null || keyBufLength == 0 )
throw new IllegalArgumentException("Key cannot be NULL or of zero length.");
/* Size allocated "int" for key length + key + "int" for data length + data */
int length = buffer.getLength();
ByteBuffer byteBuffer = ByteBuffer.allocateDirect( 4 + keyBufLength + 4 + length );
byteBuffer.putInt(keyBufLength);
byteBuffer.put(keyBuffer.getData(), 0, keyBufLength);
byteBuffer.putInt(length);
byteBuffer.put(buffer.getData(), 0, length);
byteBuffer.flip();
fc_.write(byteBuffer);
}
public void append(String key, DataOutputBuffer buffer) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
int length = buffer.getLength();
/* Size allocated : utfPrefix_ + key length + "int" for data size + data */
ByteBuffer byteBuffer = ByteBuffer.allocateDirect( SequenceFile.utfPrefix_ + key.length() + 4 + length);
SequenceFile.writeUTF(byteBuffer, key);
byteBuffer.putInt(length);
byteBuffer.put(buffer.getData(), 0, length);
byteBuffer.flip();
fc_.write(byteBuffer);
}
public void append(String key, byte[] value) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
/* Size allocated key length + "int" for data size + data */
ByteBuffer byteBuffer = ByteBuffer.allocateDirect(utfPrefix_ + key.length() + 4 + value.length);
SequenceFile.writeUTF(byteBuffer, key);
byteBuffer.putInt(value.length);
byteBuffer.put(value);
byteBuffer.flip();
fc_.write(byteBuffer);
}
public void append(String key, long value) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
/* Size allocated key length + a long */
ByteBuffer byteBuffer = ByteBuffer.allocateDirect(SequenceFile.utfPrefix_ + key.length() + 8);
SequenceFile.writeUTF(byteBuffer, key);
byteBuffer.putLong(value);
byteBuffer.flip();
fc_.write(byteBuffer);
}
/*
* Be extremely careful while using this API. This currently
* used to write the commit log header in the commit logs.
* If not used carefully it could completely screw up reads
* of other key/value pairs that are written.
*/
public long writeDirect(byte[] bytes) throws IOException
{
ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bytes.length);
byteBuffer.put(bytes);
byteBuffer.flip();
fc_.write(byteBuffer);
return fc_.position();
}
public void close() throws IOException
{
fc_.close();
}
public void close(byte[] footer, int size) throws IOException
{
/* Size is marker length + "int" for size + footer data */
ByteBuffer byteBuffer = ByteBuffer.allocateDirect( utfPrefix_ + SequenceFile.marker_.length() + 4 + footer.length);
SequenceFile.writeUTF(byteBuffer, SequenceFile.marker_);
byteBuffer.putInt(size);
byteBuffer.put(footer);
byteBuffer.flip();
fc_.write(byteBuffer);
}
public String getFileName()
{
return filename_;
}
public long getFileSize() throws IOException
{
return fc_.size();
}
}
public static class FastConcurrentWriter extends AbstractWriter
{
private FileChannel fc_;
private MappedByteBuffer buffer_;
public FastConcurrentWriter(String filename, int size) throws IOException
{
super(filename);
fc_ = new RandomAccessFile(filename, "rw").getChannel();
buffer_ = fc_.map( FileChannel.MapMode.READ_WRITE, 0, size );
buffer_.load();
}
void unmap(final Object buffer)
{
AccessController.doPrivileged( new PrivilegedAction<MappedByteBuffer>()
{
public MappedByteBuffer run()
{
try
{
Method getCleanerMethod = buffer.getClass().getMethod("cleaner", new Class[0]);
getCleanerMethod.setAccessible(true);
sun.misc.Cleaner cleaner = (sun.misc.Cleaner)getCleanerMethod.invoke(buffer,new Object[0]);
cleaner.clean();
}
catch(Throwable e)
{
logger_.warn( LogUtil.throwableToString(e) );
}
return null;
}
});
}
public long getCurrentPosition() throws IOException
{
return buffer_.position();
}
public void seek(long position) throws IOException
{
buffer_.position((int)position);
}
public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException
{
int keyBufLength = keyBuffer.getLength();
if ( keyBuffer == null || keyBufLength == 0 )
throw new IllegalArgumentException("Key cannot be NULL or of zero length.");
int length = buffer.getLength();
buffer_.putInt(keyBufLength);
buffer_.put(keyBuffer.getData(), 0, keyBufLength);
buffer_.putInt(length);
buffer_.put(buffer.getData(), 0, length);
}
public void append(String key, DataOutputBuffer buffer) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
int length = buffer.getLength();
SequenceFile.writeUTF(buffer_, key);
buffer_.putInt(length);
buffer_.put(buffer.getData(), 0, length);
}
public void append(String key, byte[] value) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
SequenceFile.writeUTF(buffer_, key);
buffer_.putInt(value.length);
buffer_.put(value);
}
public void append(String key, long value) throws IOException
{
if ( key == null )
throw new IllegalArgumentException("Key cannot be NULL.");
SequenceFile.writeUTF(buffer_, key);
buffer_.putLong(value);
}
/*
* Be extremely careful while using this API. This currently
* used to write the commit log header in the commit logs.
* If not used carefully it could completely screw up reads
* of other key/value pairs that are written.
*/
public long writeDirect(byte[] bytes) throws IOException
{
buffer_.put(bytes);
return buffer_.position();
}
public void close() throws IOException
{
buffer_.flip();
buffer_.force();
unmap(buffer_);
fc_.truncate(buffer_.limit());
}
public void close(byte[] footer, int size) throws IOException
{
SequenceFile.writeUTF(buffer_, SequenceFile.marker_);
buffer_.putInt(size);
buffer_.put(footer);
close();
}
public String getFileName()
{
return filename_;
}
public long getFileSize() throws IOException
{
return buffer_.position();
}
}
public static abstract class AbstractReader implements IFileReader
{
private static final short utfPrefix_ = 2;
protected RandomAccessFile file_;
protected String filename_;
AbstractReader(String filename)
{
filename_ = filename;
}
public String getFileName()
{
return filename_;
}
/**
* Return the position of the given key from the block index.
* @param key the key whose offset is to be extracted from the current block index
*/
public long getPositionFromBlockIndex(String key) throws IOException
{
long position = -1L;
/* note the beginning of the block index */
long blockIndexPosition = file_.getFilePointer();
/* read the block key. */
String blockIndexKey = file_.readUTF();
if ( !blockIndexKey.equals(SSTable.blockIndexKey_) )
throw new IOException("Unexpected position to be reading the block index from.");
/* read the size of the block index */
int size = file_.readInt();
/* Read the entire block index. */
byte[] bytes = new byte[size];
file_.readFully(bytes);
DataInputBuffer bufIn = new DataInputBuffer();
bufIn.reset(bytes, bytes.length);
/* Number of keys in the block. */
int keys = bufIn.readInt();
for ( int i = 0; i < keys; ++i )
{
String keyInBlock = bufIn.readUTF();
if ( keyInBlock.equals(key) )
{
position = bufIn.readLong();
break;
}
else
{
/*
* This is not the key we are looking for. So read its position
* and the size of the data associated with it. This was strored
* as the BlockMetadata.
*/
bufIn.readLong();
bufIn.readLong();
}
}
/* we do this because relative position of the key within a block is stored. */
if ( position != -1L )
position = blockIndexPosition - position;
return position;
}
/**
* Return the block index metadata for a given key.
*/
public SSTable.BlockMetadata getBlockMetadata(String key) throws IOException
{
SSTable.BlockMetadata blockMetadata = SSTable.BlockMetadata.NULL;
/* read the block key. */
String blockIndexKey = file_.readUTF();
if ( !blockIndexKey.equals(SSTable.blockIndexKey_) )
throw new IOException("Unexpected position to be reading the block index from.");
/* read the size of the block index */
int size = file_.readInt();
/* Read the entire block index. */
byte[] bytes = new byte[size];
file_.readFully(bytes);
DataInputBuffer bufIn = new DataInputBuffer();
bufIn.reset(bytes, bytes.length);
/* Number of keys in the block. */
int keys = bufIn.readInt();
for ( int i = 0; i < keys; ++i )
{
String keyInBlock = bufIn.readUTF();
if ( keyInBlock.equals(key) )
{
long position = bufIn.readLong();
long dataSize = bufIn.readLong();
blockMetadata = new SSTable.BlockMetadata(position, dataSize);
break;
}
else
{
/*
* This is not the key we are looking for. So read its position
* and the size of the data associated with it. This was strored
* as the BlockMetadata.
*/
bufIn.readLong();
bufIn.readLong();
}
}
return blockMetadata;
}
/**
* This function seeks to the position where the key data is present in the file
* in order to get the buffer cache populated with the key-data. This is done as
* a hint before the user actually queries the data.
* @param key the key whose data is being touched
* @param fData
*/
public long touch(String key, boolean fData) throws IOException
{
long bytesRead = -1L;
if ( isEOF() )
return bytesRead;
long startPosition = file_.getFilePointer();
String keyInDisk = file_.readUTF();
if ( keyInDisk != null )
{
/*
* If key on disk is greater than requested key
* we can bail out since we exploit the property
* of the SSTable format.
*/
if ( keyInDisk.compareTo(key) > 0 )
return bytesRead;
/*
* If we found the key then we populate the buffer that
* is passed in. If not then we skip over this key and
* position ourselves to read the next one.
*/
int dataSize = file_.readInt();
if ( keyInDisk.equals(key) )
{
/* return 0L to signal the key has been touched. */
bytesRead = 0L;
return bytesRead;
}
else
{
/* skip over data portion */
file_.seek(dataSize + file_.getFilePointer());
}
long endPosition = file_.getFilePointer();
bytesRead = endPosition - startPosition;
}
return bytesRead;
}
/**
* This method seek the disk head to the block index, finds
* the offset of the key within the block and seeks to that
* offset.
* @param key we are interested in.
* @param section indicates the location of the block index.
* @throws IOException
*/
private void seekTo(String key, SSTable.Range section) throws IOException
{
/* Goto the Block Index */
seek(section.end);
long position = getPositionFromBlockIndex(key);
seek(position);
}
/**
* This method dumps the next key/value into the DataOuputStream
* passed in. Always use this method to query for application
* specific data as it will have indexes.
*
* @param key key we are interested in.
* @param dos DataOutputStream that needs to be filled.
* @param cfName The name of the column family only without the ":"
* @param columnNames The list of columns in the cfName column family that we want to return
* @param section region of the file that needs to be read
* @return total number of bytes read/considered
*
*/
public long next(String key, DataOutputBuffer bufOut, String columnFamilyName, List<String> columnNames, SSTable.Range section) throws IOException
{
long bytesRead = -1L;
if ( isEOF() )
return bytesRead;
seekTo(key, section);
/* note the position where the key starts */
long startPosition = file_.getFilePointer();
String keyInDisk = file_.readUTF();
if ( keyInDisk != null )
{
/*
* If key on disk is greater than requested key
* we can bail out since we exploit the property
* of the SSTable format.
*/
if ( keyInDisk.compareTo(key) > 0 )
return bytesRead;
/*
* If we found the key then we populate the buffer that
* is passed in. If not then we skip over this key and
* position ourselves to read the next one.
*/
int dataSize = file_.readInt();
if ( keyInDisk.equals(key) )
{
/* write the key into buffer */
bufOut.writeUTF( keyInDisk );
/* if there is no column indexing enabled on this column then there are no indexes for it */
if(!DatabaseDescriptor.isNameIndexEnabled(columnFamilyName))
{
/* write the data size */
bufOut.writeInt(dataSize);
/* write the data into buffer, except the boolean we have read */
bufOut.write(file_, dataSize);
}
/* if we need to read the all the columns do not read the column indexes */
else if(columnNames == null || columnNames.size() == 0)
{
int bytesSkipped = IndexHelper.skip(file_);
/*
* read the correct number of bytes for the column family and
* write data into buffer
*/
dataSize -= bytesSkipped;
/* write the data size */
bufOut.writeInt(dataSize);
/* write the data into buffer, except the boolean we have read */
bufOut.write(file_, dataSize);
}
else
{
// TODO this is all kinds of screwed up -- we're basically reproducing the ColumnFamily deserialize/serialize code,
// but in a way that it's easy for things to get de-synched and broken.
/* check if we have an index */
boolean hasColumnIndexes = file_.readBoolean();
int totalBytesRead = 1;
List<ColumnPositionInfo> columnIndexList = null;
/* if we do then deserialize the index */
if(hasColumnIndexes)
{
columnIndexList = new ArrayList<IndexHelper.ColumnPositionInfo>();
/* read the index */
totalBytesRead += IndexHelper.deserializeIndex(file_, columnIndexList);
}
dataSize -= totalBytesRead;
/* read the column family name */
String cfName = file_.readUTF();
dataSize -= (utfPrefix_ + cfName.length());
/* read if this cf is marked for delete */
long markedForDeleteAt = file_.readLong();
dataSize -= 8;
/* read the total number of columns */
int totalNumCols = file_.readInt();
dataSize -= 4;
// TODO: this is name sorted - but eventually this should be sorted by the same criteria as the col index
/* sort the required list of columns */
Collections.sort(columnNames);
/* get the various column ranges we have to read */
List<IndexHelper.ColumnPositionInfo> columnRangeList = IndexHelper.getMultiColumnRangesFromIndex(columnNames, columnIndexList, dataSize, totalNumCols);
/* calculate the data size */
int numColsReturned = 0;
int dataSizeReturned = 0;
for(ColumnPositionInfo colRange : columnRangeList)
{
numColsReturned += colRange.numColumns();
dataSizeReturned += colRange.end() - colRange.start();
}
/*
* write the number of columns in the column family we are returning:
* dataSize that we are reading +
* length of column family name +
* one booleanfor deleted or not +
* one int for number of columns
*/
bufOut.writeInt(dataSizeReturned + utfPrefix_+cfName.length() + 4 + 1);
/* write the column family name */
bufOut.writeUTF(cfName);
/* write if this cf is marked for delete */
bufOut.writeLong(markedForDeleteAt);
/* write number of columns */
bufOut.writeInt(numColsReturned);
int prevPosition = 0;
/* now write all the columns we are required to write */
for(ColumnPositionInfo colRange : columnRangeList)
{
/* seek to the correct offset to the data */
file_.skipBytes(colRange.start() - prevPosition);
bufOut.write(file_, colRange.end() - colRange.start());
prevPosition = colRange.end();
}
}
}
else
{
/* skip over data portion */
file_.seek(dataSize + file_.getFilePointer());
}
long endPosition = file_.getFilePointer();
bytesRead = endPosition - startPosition;
}
return bytesRead;
}
/**
* This method dumps the next key/value into the DataOuputStream
* passed in.
*
* @param dos - DataOutputStream that needs to be filled.
* @return total number of bytes read/considered
*/
public long next(DataOutputBuffer bufOut) throws IOException
{
long bytesRead = -1L;
if ( isEOF() )
return bytesRead;
long startPosition = file_.getFilePointer();
String key = file_.readUTF();
if ( key != null )
{
/* write the key into buffer */
bufOut.writeUTF( key );
int dataSize = file_.readInt();
/* write data size into buffer */
bufOut.writeInt(dataSize);
/* write the data into buffer */
bufOut.write(file_, dataSize);
long endPosition = file_.getFilePointer();
bytesRead = endPosition - startPosition;
}
/*
* If we have read the bloom filter in the data
* file we know we are at the end of the file
* and no further key processing is required. So
* we return -1 indicating we are at the end of
* the file.
*/
if ( key.equals(SequenceFile.marker_) )
bytesRead = -1L;
return bytesRead;
}
/**
* This method dumps the next key/value into the DataOuputStream
* passed in.
*
* @param key - key we are interested in.
* @param dos - DataOutputStream that needs to be filled.
* @param section region of the file that needs to be read
* @return total number of bytes read/considered
*/
public long next(String key, DataOutputBuffer bufOut, SSTable.Range section) throws IOException
{
long bytesRead = -1L;
if ( isEOF() )
return bytesRead;
seekTo(key, section);
/* note the position where the key starts */
long startPosition = file_.getFilePointer();
String keyInDisk = file_.readUTF();
if ( keyInDisk != null )
{
/*
* If key on disk is greater than requested key
* we can bail out since we exploit the property
* of the SSTable format.
*/
if ( keyInDisk.compareTo(key) > 0 )
return bytesRead;
/*
* If we found the key then we populate the buffer that
* is passed in. If not then we skip over this key and
* position ourselves to read the next one.
*/
int dataSize = file_.readInt();
if ( keyInDisk.equals(key) )
{
/* write the key into buffer */
bufOut.writeUTF( keyInDisk );
/* write data size into buffer */
bufOut.writeInt(dataSize);
/* write the data into buffer */
bufOut.write(file_, dataSize);
}
else
{
/* skip over data portion */
file_.seek(dataSize + file_.getFilePointer());
}
long endPosition = file_.getFilePointer();
bytesRead = endPosition - startPosition;
}
return bytesRead;
}
}
public static class Reader extends AbstractReader
{
Reader(String filename) throws FileNotFoundException
{
super(filename);
file_ = new RandomAccessFile(filename, "r");
}
public long getEOF() throws IOException
{
return file_.length();
}
public long getCurrentPosition() throws IOException
{
return file_.getFilePointer();
}
public boolean isHealthyFileDescriptor() throws IOException
{
return file_.getFD().valid();
}
public void seek(long position) throws IOException
{
file_.seek(position);
}
public boolean isEOF() throws IOException
{
return ( getCurrentPosition() == getEOF() );
}
/**
* Be extremely careful while using this API. This currently
* used to read the commit log header from the commit logs.
* Treat this as an internal API.
* @param bytes read from the buffer into the this array
*/
public void readDirect(byte[] bytes) throws IOException
{
file_.readFully(bytes);
}
public void close() throws IOException
{
file_.close();
}
}
public static class BufferReader extends AbstractReader
{
private long position_ = 0L;
BufferReader(String filename, int size) throws FileNotFoundException
{
super(filename);
file_ = new BufferedRandomAccessFile(filename, "r", size);
}
public long getEOF() throws IOException
{
return file_.length();
}
public long getCurrentPosition() throws IOException
{
return file_.getFilePointer();
}
public boolean isHealthyFileDescriptor() throws IOException
{
return file_.getFD().valid();
}
public void seek(long position) throws IOException
{
file_.seek(position);
}
public boolean isEOF() throws IOException
{
return ( getCurrentPosition() == getEOF() );
}
/**
* Be extremely careful while using this API. This currently
* used to read the commit log header from the commit logs.
* Treat this as an internal API.
* @param bytes read from the buffer into the this array
*/
public void readDirect(byte[] bytes) throws IOException
{
file_.readFully(bytes);
}
public void close() throws IOException
{
file_.close();
}
}
private static Logger logger_ = Logger.getLogger( SequenceFile.class ) ;
public static final short utfPrefix_ = 2;
static final String marker_ = "Bloom-Filter";
public static Writer writer(String filename) throws IOException
{
return new Writer(filename);
}
public static BufferWriter bufferedWriter(String filename, int size) throws IOException
{
return new BufferWriter(filename, size);
}
public static ConcurrentWriter concurrentWriter(String filename) throws IOException
{
return new ConcurrentWriter(filename);
}
public static FastConcurrentWriter fastWriter(String filename, int size) throws IOException
{
return new FastConcurrentWriter(filename, size);
}
public static Reader reader(String filename) throws FileNotFoundException
{
return new Reader(filename);
}
public static BufferReader bufferedReader(String filename, int size) throws IOException
{
return new BufferReader(filename, size);
}
public static boolean readBoolean(ByteBuffer buffer)
{
return ( buffer.get() == 1 ? true : false );
}
/**
* Efficiently writes a UTF8 string to the buffer.
* Assuming all Strings that are passed in have length
* that can be represented as a short i.e length of the
* string is <= 65535
* @param buffer buffer to write the serialize version into
* @param str string to serialize
*/
protected static void writeUTF(ByteBuffer buffer, String str)
{
int strlen = str.length();
int utflen = 0;
int c, count = 0;
/* use charAt instead of copying String to char array */
for (int i = 0; i < strlen; i++)
{
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F))
{
utflen++;
}
else if (c > 0x07FF)
{
utflen += 3;
}
else
{
utflen += 2;
}
}
byte[] bytearr = new byte[utflen + 2];
bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);
int i = 0;
for (i = 0; i < strlen; i++)
{
c = str.charAt(i);
if (!((c >= 0x0001) && (c <= 0x007F)))
break;
bytearr[count++] = (byte) c;
}
for (; i < strlen; i++)
{
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F))
{
bytearr[count++] = (byte) c;
}
else if (c > 0x07FF)
{
bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
}
else
{
bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
}
}
buffer.put(bytearr, 0, utflen + 2);
}
/**
* Read a UTF8 string from a serialized buffer.
* @param buffer buffer from which a UTF8 string is read
* @return a Java String
*/
protected static String readUTF(ByteBuffer in) throws IOException
{
int utflen = in.getShort();
byte[] bytearr = new byte[utflen];
char[] chararr = new char[utflen];
int c, char2, char3;
int count = 0;
int chararr_count = 0;
in.get(bytearr, 0, utflen);
while (count < utflen)
{
c = (int) bytearr[count] & 0xff;
if (c > 127)
break;
count++;
chararr[chararr_count++] = (char) c;
}
while (count < utflen)
{
c = (int) bytearr[count] & 0xff;
switch (c >> 4)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
/* 0xxxxxxx */
count++;
chararr[chararr_count++] = (char) c;
break;
case 12:
case 13:
/* 110x xxxx 10xx xxxx */
count += 2;
if (count > utflen)
throw new UTFDataFormatException(
"malformed input: partial character at end");
char2 = (int) bytearr[count - 1];
if ((char2 & 0xC0) != 0x80)
throw new UTFDataFormatException(
"malformed input around byte " + count);
chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
if (count > utflen)
throw new UTFDataFormatException(
"malformed input: partial character at end");
char2 = (int) bytearr[count - 2];
char3 = (int) bytearr[count - 1];
if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
throw new UTFDataFormatException(
"malformed input around byte " + (count - 1));
chararr[chararr_count++] = (char) (((c & 0x0F) << 12)
| ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0));
break;
default:
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException("malformed input around byte "
+ count);
}
}
// The number of chars produced may be less than utflen
return new String(chararr, 0, chararr_count);
}
public static short getFileId(String file)
{
String[] peices = file.split("-");
return Short.parseShort( peices[2] );
}
}