/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.infrastructure.io; import java.io.*; import java.lang.reflect.Method; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.log4j.Logger; import com.facebook.infrastructure.config.DatabaseDescriptor; import com.facebook.infrastructure.db.RowMutation; import com.facebook.infrastructure.io.IndexHelper.ColumnPositionInfo; import com.facebook.infrastructure.utils.LogUtil; /** * This class writes key/value pairs seqeuntially to disk. It is * also used to read sequentially from disk. However one could * jump to random positions to read data from the file. This class * also has many implementations of the IFileWriter and IFileReader * interfaces which are exposed through factory methods. * * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com ) & Karthik Ranganathan ( kranganathan@facebook.com ) */ public class SequenceFile { public static abstract class AbstractWriter implements IFileWriter { protected String filename_; AbstractWriter(String filename) { filename_ = filename; } public String getFileName() { return filename_; } public long lastModified() { File file = new File(filename_); return file.lastModified(); } } public static class Writer extends AbstractWriter { private RandomAccessFile file_; Writer(String filename) throws IOException { super(filename); File file = new File(filename); boolean isNewFile = false; if ( !file.exists() ) { file.createNewFile(); } file_ = new RandomAccessFile(file, "rw"); } public long getCurrentPosition() throws IOException { return file_.getFilePointer(); } public void seek(long position) throws IOException { file_.seek(position); } public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException { int keyBufLength = keyBuffer.getLength(); if ( keyBuffer == null || keyBufLength == 0 ) throw new IllegalArgumentException("Key cannot be NULL or of zero length."); file_.seek(file_.getFilePointer()); file_.writeInt(keyBufLength); file_.write(keyBuffer.getData(), 0, keyBufLength); int length = buffer.getLength(); file_.writeInt(length); file_.write(buffer.getData(), 0, length); } public void append(String key, DataOutputBuffer buffer) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); file_.seek(file_.getFilePointer()); file_.writeUTF(key); int length = buffer.getLength(); file_.writeInt(length); file_.write(buffer.getData(), 0, length); } public void append(String key, byte[] value) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); file_.seek(file_.getFilePointer()); file_.writeUTF(key); file_.writeInt(value.length); file_.write(value); } public void append(String key, long value) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); file_.seek(file_.getFilePointer()); file_.writeUTF(key); file_.writeLong(value); } /** * Be extremely careful while using this API. This currently * used to write the commit log header in the commit logs. * If not used carefully it could completely screw up reads * of other key/value pairs that are written. * @param bytes the bytes to write */ public long writeDirect(byte[] bytes) throws IOException { file_.write(bytes); return file_.getFilePointer(); } public void close() throws IOException { file_.close(); } public void close(byte[] footer, int size) throws IOException { file_.writeUTF(SequenceFile.marker_); file_.writeInt(size); file_.write(footer); } public String getFileName() { return filename_; } public long getFileSize() throws IOException { return file_.length(); } } public static class BufferWriter extends AbstractWriter { private BufferedRandomAccessFile file_; private long position_ = 0L; BufferWriter(String filename, int size) throws IOException { super(filename); File file = new File(filename); file_ = new BufferedRandomAccessFile(file, "rw", size); if ( !file.exists() ) { file.createNewFile(); } } public long getCurrentPosition() throws IOException { return file_.getFilePointer(); } public void seek(long position) throws IOException { file_.seek(position); } public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException { int keyBufLength = keyBuffer.getLength(); if ( keyBuffer == null || keyBufLength == 0 ) throw new IllegalArgumentException("Key cannot be NULL or of zero length."); file_.seek(file_.getFilePointer()); file_.writeInt(keyBufLength); file_.write(keyBuffer.getData(), 0, keyBufLength); int length = buffer.getLength(); file_.writeInt(length); file_.write(buffer.getData(), 0, length); } public void append(String key, DataOutputBuffer buffer) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); file_.seek(file_.getFilePointer()); file_.writeUTF(key); int length = buffer.getLength(); file_.writeInt(length); file_.write(buffer.getData(), 0, length); } public void append(String key, byte[] value) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); file_.seek(file_.getFilePointer()); file_.writeUTF(key); file_.writeInt(value.length); file_.write(value); } public void append(String key, long value) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); file_.seek(file_.getFilePointer()); file_.writeUTF(key); file_.writeLong(value); } /** * Be extremely careful while using this API. This currently * used to write the commit log header in the commit logs. * If not used carefully it could completely screw up reads * of other key/value pairs that are written. * @param bytes the bytes to write */ public long writeDirect(byte[] bytes) throws IOException { file_.write(bytes); return file_.getFilePointer(); } public void close() throws IOException { file_.close(); } public void close(byte[] footer, int size) throws IOException { file_.writeUTF(SequenceFile.marker_); file_.writeInt(size); file_.write(footer); } public String getFileName() { return filename_; } public long getFileSize() throws IOException { return file_.length(); } } public static class ConcurrentWriter extends AbstractWriter { private FileChannel fc_; public ConcurrentWriter(String filename) throws IOException { super(filename); RandomAccessFile raf = new RandomAccessFile(filename, "rw"); fc_ = raf.getChannel(); } public long getCurrentPosition() throws IOException { return fc_.position(); } public void seek(long position) throws IOException { fc_.position(position); } public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException { int keyBufLength = keyBuffer.getLength(); if ( keyBuffer == null || keyBufLength == 0 ) throw new IllegalArgumentException("Key cannot be NULL or of zero length."); /* Size allocated "int" for key length + key + "int" for data length + data */ int length = buffer.getLength(); ByteBuffer byteBuffer = ByteBuffer.allocateDirect( 4 + keyBufLength + 4 + length ); byteBuffer.putInt(keyBufLength); byteBuffer.put(keyBuffer.getData(), 0, keyBufLength); byteBuffer.putInt(length); byteBuffer.put(buffer.getData(), 0, length); byteBuffer.flip(); fc_.write(byteBuffer); } public void append(String key, DataOutputBuffer buffer) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); int length = buffer.getLength(); /* Size allocated : utfPrefix_ + key length + "int" for data size + data */ ByteBuffer byteBuffer = ByteBuffer.allocateDirect( SequenceFile.utfPrefix_ + key.length() + 4 + length); SequenceFile.writeUTF(byteBuffer, key); byteBuffer.putInt(length); byteBuffer.put(buffer.getData(), 0, length); byteBuffer.flip(); fc_.write(byteBuffer); } public void append(String key, byte[] value) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); /* Size allocated key length + "int" for data size + data */ ByteBuffer byteBuffer = ByteBuffer.allocateDirect(utfPrefix_ + key.length() + 4 + value.length); SequenceFile.writeUTF(byteBuffer, key); byteBuffer.putInt(value.length); byteBuffer.put(value); byteBuffer.flip(); fc_.write(byteBuffer); } public void append(String key, long value) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); /* Size allocated key length + a long */ ByteBuffer byteBuffer = ByteBuffer.allocateDirect(SequenceFile.utfPrefix_ + key.length() + 8); SequenceFile.writeUTF(byteBuffer, key); byteBuffer.putLong(value); byteBuffer.flip(); fc_.write(byteBuffer); } /* * Be extremely careful while using this API. This currently * used to write the commit log header in the commit logs. * If not used carefully it could completely screw up reads * of other key/value pairs that are written. */ public long writeDirect(byte[] bytes) throws IOException { ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bytes.length); byteBuffer.put(bytes); byteBuffer.flip(); fc_.write(byteBuffer); return fc_.position(); } public void close() throws IOException { fc_.close(); } public void close(byte[] footer, int size) throws IOException { /* Size is marker length + "int" for size + footer data */ ByteBuffer byteBuffer = ByteBuffer.allocateDirect( utfPrefix_ + SequenceFile.marker_.length() + 4 + footer.length); SequenceFile.writeUTF(byteBuffer, SequenceFile.marker_); byteBuffer.putInt(size); byteBuffer.put(footer); byteBuffer.flip(); fc_.write(byteBuffer); } public String getFileName() { return filename_; } public long getFileSize() throws IOException { return fc_.size(); } } public static class FastConcurrentWriter extends AbstractWriter { private FileChannel fc_; private MappedByteBuffer buffer_; public FastConcurrentWriter(String filename, int size) throws IOException { super(filename); fc_ = new RandomAccessFile(filename, "rw").getChannel(); buffer_ = fc_.map( FileChannel.MapMode.READ_WRITE, 0, size ); buffer_.load(); } void unmap(final Object buffer) { AccessController.doPrivileged( new PrivilegedAction<MappedByteBuffer>() { public MappedByteBuffer run() { try { Method getCleanerMethod = buffer.getClass().getMethod("cleaner", new Class[0]); getCleanerMethod.setAccessible(true); sun.misc.Cleaner cleaner = (sun.misc.Cleaner)getCleanerMethod.invoke(buffer,new Object[0]); cleaner.clean(); } catch(Throwable e) { logger_.warn( LogUtil.throwableToString(e) ); } return null; } }); } public long getCurrentPosition() throws IOException { return buffer_.position(); } public void seek(long position) throws IOException { buffer_.position((int)position); } public void append(DataOutputBuffer keyBuffer, DataOutputBuffer buffer) throws IOException { int keyBufLength = keyBuffer.getLength(); if ( keyBuffer == null || keyBufLength == 0 ) throw new IllegalArgumentException("Key cannot be NULL or of zero length."); int length = buffer.getLength(); buffer_.putInt(keyBufLength); buffer_.put(keyBuffer.getData(), 0, keyBufLength); buffer_.putInt(length); buffer_.put(buffer.getData(), 0, length); } public void append(String key, DataOutputBuffer buffer) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); int length = buffer.getLength(); SequenceFile.writeUTF(buffer_, key); buffer_.putInt(length); buffer_.put(buffer.getData(), 0, length); } public void append(String key, byte[] value) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); SequenceFile.writeUTF(buffer_, key); buffer_.putInt(value.length); buffer_.put(value); } public void append(String key, long value) throws IOException { if ( key == null ) throw new IllegalArgumentException("Key cannot be NULL."); SequenceFile.writeUTF(buffer_, key); buffer_.putLong(value); } /* * Be extremely careful while using this API. This currently * used to write the commit log header in the commit logs. * If not used carefully it could completely screw up reads * of other key/value pairs that are written. */ public long writeDirect(byte[] bytes) throws IOException { buffer_.put(bytes); return buffer_.position(); } public void close() throws IOException { buffer_.flip(); buffer_.force(); unmap(buffer_); fc_.truncate(buffer_.limit()); } public void close(byte[] footer, int size) throws IOException { SequenceFile.writeUTF(buffer_, SequenceFile.marker_); buffer_.putInt(size); buffer_.put(footer); close(); } public String getFileName() { return filename_; } public long getFileSize() throws IOException { return buffer_.position(); } } public static abstract class AbstractReader implements IFileReader { private static final short utfPrefix_ = 2; protected RandomAccessFile file_; protected String filename_; AbstractReader(String filename) { filename_ = filename; } public String getFileName() { return filename_; } /** * Return the position of the given key from the block index. * @param key the key whose offset is to be extracted from the current block index */ public long getPositionFromBlockIndex(String key) throws IOException { long position = -1L; /* note the beginning of the block index */ long blockIndexPosition = file_.getFilePointer(); /* read the block key. */ String blockIndexKey = file_.readUTF(); if ( !blockIndexKey.equals(SSTable.blockIndexKey_) ) throw new IOException("Unexpected position to be reading the block index from."); /* read the size of the block index */ int size = file_.readInt(); /* Read the entire block index. */ byte[] bytes = new byte[size]; file_.readFully(bytes); DataInputBuffer bufIn = new DataInputBuffer(); bufIn.reset(bytes, bytes.length); /* Number of keys in the block. */ int keys = bufIn.readInt(); for ( int i = 0; i < keys; ++i ) { String keyInBlock = bufIn.readUTF(); if ( keyInBlock.equals(key) ) { position = bufIn.readLong(); break; } else { /* * This is not the key we are looking for. So read its position * and the size of the data associated with it. This was strored * as the BlockMetadata. */ bufIn.readLong(); bufIn.readLong(); } } /* we do this because relative position of the key within a block is stored. */ if ( position != -1L ) position = blockIndexPosition - position; return position; } /** * Return the block index metadata for a given key. */ public SSTable.BlockMetadata getBlockMetadata(String key) throws IOException { SSTable.BlockMetadata blockMetadata = SSTable.BlockMetadata.NULL; /* read the block key. */ String blockIndexKey = file_.readUTF(); if ( !blockIndexKey.equals(SSTable.blockIndexKey_) ) throw new IOException("Unexpected position to be reading the block index from."); /* read the size of the block index */ int size = file_.readInt(); /* Read the entire block index. */ byte[] bytes = new byte[size]; file_.readFully(bytes); DataInputBuffer bufIn = new DataInputBuffer(); bufIn.reset(bytes, bytes.length); /* Number of keys in the block. */ int keys = bufIn.readInt(); for ( int i = 0; i < keys; ++i ) { String keyInBlock = bufIn.readUTF(); if ( keyInBlock.equals(key) ) { long position = bufIn.readLong(); long dataSize = bufIn.readLong(); blockMetadata = new SSTable.BlockMetadata(position, dataSize); break; } else { /* * This is not the key we are looking for. So read its position * and the size of the data associated with it. This was strored * as the BlockMetadata. */ bufIn.readLong(); bufIn.readLong(); } } return blockMetadata; } /** * This function seeks to the position where the key data is present in the file * in order to get the buffer cache populated with the key-data. This is done as * a hint before the user actually queries the data. * @param key the key whose data is being touched * @param fData */ public long touch(String key, boolean fData) throws IOException { long bytesRead = -1L; if ( isEOF() ) return bytesRead; long startPosition = file_.getFilePointer(); String keyInDisk = file_.readUTF(); if ( keyInDisk != null ) { /* * If key on disk is greater than requested key * we can bail out since we exploit the property * of the SSTable format. */ if ( keyInDisk.compareTo(key) > 0 ) return bytesRead; /* * If we found the key then we populate the buffer that * is passed in. If not then we skip over this key and * position ourselves to read the next one. */ int dataSize = file_.readInt(); if ( keyInDisk.equals(key) ) { /* return 0L to signal the key has been touched. */ bytesRead = 0L; return bytesRead; } else { /* skip over data portion */ file_.seek(dataSize + file_.getFilePointer()); } long endPosition = file_.getFilePointer(); bytesRead = endPosition - startPosition; } return bytesRead; } /** * This method seek the disk head to the block index, finds * the offset of the key within the block and seeks to that * offset. * @param key we are interested in. * @param section indicates the location of the block index. * @throws IOException */ private void seekTo(String key, SSTable.Range section) throws IOException { /* Goto the Block Index */ seek(section.end); long position = getPositionFromBlockIndex(key); seek(position); } /** * This method dumps the next key/value into the DataOuputStream * passed in. Always use this method to query for application * specific data as it will have indexes. * * @param key key we are interested in. * @param dos DataOutputStream that needs to be filled. * @param cfName The name of the column family only without the ":" * @param columnNames The list of columns in the cfName column family that we want to return * @param section region of the file that needs to be read * @return total number of bytes read/considered * */ public long next(String key, DataOutputBuffer bufOut, String columnFamilyName, List<String> columnNames, SSTable.Range section) throws IOException { long bytesRead = -1L; if ( isEOF() ) return bytesRead; seekTo(key, section); /* note the position where the key starts */ long startPosition = file_.getFilePointer(); String keyInDisk = file_.readUTF(); if ( keyInDisk != null ) { /* * If key on disk is greater than requested key * we can bail out since we exploit the property * of the SSTable format. */ if ( keyInDisk.compareTo(key) > 0 ) return bytesRead; /* * If we found the key then we populate the buffer that * is passed in. If not then we skip over this key and * position ourselves to read the next one. */ int dataSize = file_.readInt(); if ( keyInDisk.equals(key) ) { /* write the key into buffer */ bufOut.writeUTF( keyInDisk ); /* if there is no column indexing enabled on this column then there are no indexes for it */ if(!DatabaseDescriptor.isNameIndexEnabled(columnFamilyName)) { /* write the data size */ bufOut.writeInt(dataSize); /* write the data into buffer, except the boolean we have read */ bufOut.write(file_, dataSize); } /* if we need to read the all the columns do not read the column indexes */ else if(columnNames == null || columnNames.size() == 0) { int bytesSkipped = IndexHelper.skip(file_); /* * read the correct number of bytes for the column family and * write data into buffer */ dataSize -= bytesSkipped; /* write the data size */ bufOut.writeInt(dataSize); /* write the data into buffer, except the boolean we have read */ bufOut.write(file_, dataSize); } else { // TODO this is all kinds of screwed up -- we're basically reproducing the ColumnFamily deserialize/serialize code, // but in a way that it's easy for things to get de-synched and broken. /* check if we have an index */ boolean hasColumnIndexes = file_.readBoolean(); int totalBytesRead = 1; List<ColumnPositionInfo> columnIndexList = null; /* if we do then deserialize the index */ if(hasColumnIndexes) { columnIndexList = new ArrayList<IndexHelper.ColumnPositionInfo>(); /* read the index */ totalBytesRead += IndexHelper.deserializeIndex(file_, columnIndexList); } dataSize -= totalBytesRead; /* read the column family name */ String cfName = file_.readUTF(); dataSize -= (utfPrefix_ + cfName.length()); /* read if this cf is marked for delete */ long markedForDeleteAt = file_.readLong(); dataSize -= 8; /* read the total number of columns */ int totalNumCols = file_.readInt(); dataSize -= 4; // TODO: this is name sorted - but eventually this should be sorted by the same criteria as the col index /* sort the required list of columns */ Collections.sort(columnNames); /* get the various column ranges we have to read */ List<IndexHelper.ColumnPositionInfo> columnRangeList = IndexHelper.getMultiColumnRangesFromIndex(columnNames, columnIndexList, dataSize, totalNumCols); /* calculate the data size */ int numColsReturned = 0; int dataSizeReturned = 0; for(ColumnPositionInfo colRange : columnRangeList) { numColsReturned += colRange.numColumns(); dataSizeReturned += colRange.end() - colRange.start(); } /* * write the number of columns in the column family we are returning: * dataSize that we are reading + * length of column family name + * one booleanfor deleted or not + * one int for number of columns */ bufOut.writeInt(dataSizeReturned + utfPrefix_+cfName.length() + 4 + 1); /* write the column family name */ bufOut.writeUTF(cfName); /* write if this cf is marked for delete */ bufOut.writeLong(markedForDeleteAt); /* write number of columns */ bufOut.writeInt(numColsReturned); int prevPosition = 0; /* now write all the columns we are required to write */ for(ColumnPositionInfo colRange : columnRangeList) { /* seek to the correct offset to the data */ file_.skipBytes(colRange.start() - prevPosition); bufOut.write(file_, colRange.end() - colRange.start()); prevPosition = colRange.end(); } } } else { /* skip over data portion */ file_.seek(dataSize + file_.getFilePointer()); } long endPosition = file_.getFilePointer(); bytesRead = endPosition - startPosition; } return bytesRead; } /** * This method dumps the next key/value into the DataOuputStream * passed in. * * @param dos - DataOutputStream that needs to be filled. * @return total number of bytes read/considered */ public long next(DataOutputBuffer bufOut) throws IOException { long bytesRead = -1L; if ( isEOF() ) return bytesRead; long startPosition = file_.getFilePointer(); String key = file_.readUTF(); if ( key != null ) { /* write the key into buffer */ bufOut.writeUTF( key ); int dataSize = file_.readInt(); /* write data size into buffer */ bufOut.writeInt(dataSize); /* write the data into buffer */ bufOut.write(file_, dataSize); long endPosition = file_.getFilePointer(); bytesRead = endPosition - startPosition; } /* * If we have read the bloom filter in the data * file we know we are at the end of the file * and no further key processing is required. So * we return -1 indicating we are at the end of * the file. */ if ( key.equals(SequenceFile.marker_) ) bytesRead = -1L; return bytesRead; } /** * This method dumps the next key/value into the DataOuputStream * passed in. * * @param key - key we are interested in. * @param dos - DataOutputStream that needs to be filled. * @param section region of the file that needs to be read * @return total number of bytes read/considered */ public long next(String key, DataOutputBuffer bufOut, SSTable.Range section) throws IOException { long bytesRead = -1L; if ( isEOF() ) return bytesRead; seekTo(key, section); /* note the position where the key starts */ long startPosition = file_.getFilePointer(); String keyInDisk = file_.readUTF(); if ( keyInDisk != null ) { /* * If key on disk is greater than requested key * we can bail out since we exploit the property * of the SSTable format. */ if ( keyInDisk.compareTo(key) > 0 ) return bytesRead; /* * If we found the key then we populate the buffer that * is passed in. If not then we skip over this key and * position ourselves to read the next one. */ int dataSize = file_.readInt(); if ( keyInDisk.equals(key) ) { /* write the key into buffer */ bufOut.writeUTF( keyInDisk ); /* write data size into buffer */ bufOut.writeInt(dataSize); /* write the data into buffer */ bufOut.write(file_, dataSize); } else { /* skip over data portion */ file_.seek(dataSize + file_.getFilePointer()); } long endPosition = file_.getFilePointer(); bytesRead = endPosition - startPosition; } return bytesRead; } } public static class Reader extends AbstractReader { Reader(String filename) throws FileNotFoundException { super(filename); file_ = new RandomAccessFile(filename, "r"); } public long getEOF() throws IOException { return file_.length(); } public long getCurrentPosition() throws IOException { return file_.getFilePointer(); } public boolean isHealthyFileDescriptor() throws IOException { return file_.getFD().valid(); } public void seek(long position) throws IOException { file_.seek(position); } public boolean isEOF() throws IOException { return ( getCurrentPosition() == getEOF() ); } /** * Be extremely careful while using this API. This currently * used to read the commit log header from the commit logs. * Treat this as an internal API. * @param bytes read from the buffer into the this array */ public void readDirect(byte[] bytes) throws IOException { file_.readFully(bytes); } public void close() throws IOException { file_.close(); } } public static class BufferReader extends AbstractReader { private long position_ = 0L; BufferReader(String filename, int size) throws FileNotFoundException { super(filename); file_ = new BufferedRandomAccessFile(filename, "r", size); } public long getEOF() throws IOException { return file_.length(); } public long getCurrentPosition() throws IOException { return file_.getFilePointer(); } public boolean isHealthyFileDescriptor() throws IOException { return file_.getFD().valid(); } public void seek(long position) throws IOException { file_.seek(position); } public boolean isEOF() throws IOException { return ( getCurrentPosition() == getEOF() ); } /** * Be extremely careful while using this API. This currently * used to read the commit log header from the commit logs. * Treat this as an internal API. * @param bytes read from the buffer into the this array */ public void readDirect(byte[] bytes) throws IOException { file_.readFully(bytes); } public void close() throws IOException { file_.close(); } } private static Logger logger_ = Logger.getLogger( SequenceFile.class ) ; public static final short utfPrefix_ = 2; static final String marker_ = "Bloom-Filter"; public static Writer writer(String filename) throws IOException { return new Writer(filename); } public static BufferWriter bufferedWriter(String filename, int size) throws IOException { return new BufferWriter(filename, size); } public static ConcurrentWriter concurrentWriter(String filename) throws IOException { return new ConcurrentWriter(filename); } public static FastConcurrentWriter fastWriter(String filename, int size) throws IOException { return new FastConcurrentWriter(filename, size); } public static Reader reader(String filename) throws FileNotFoundException { return new Reader(filename); } public static BufferReader bufferedReader(String filename, int size) throws IOException { return new BufferReader(filename, size); } public static boolean readBoolean(ByteBuffer buffer) { return ( buffer.get() == 1 ? true : false ); } /** * Efficiently writes a UTF8 string to the buffer. * Assuming all Strings that are passed in have length * that can be represented as a short i.e length of the * string is <= 65535 * @param buffer buffer to write the serialize version into * @param str string to serialize */ protected static void writeUTF(ByteBuffer buffer, String str) { int strlen = str.length(); int utflen = 0; int c, count = 0; /* use charAt instead of copying String to char array */ for (int i = 0; i < strlen; i++) { c = str.charAt(i); if ((c >= 0x0001) && (c <= 0x007F)) { utflen++; } else if (c > 0x07FF) { utflen += 3; } else { utflen += 2; } } byte[] bytearr = new byte[utflen + 2]; bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF); bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF); int i = 0; for (i = 0; i < strlen; i++) { c = str.charAt(i); if (!((c >= 0x0001) && (c <= 0x007F))) break; bytearr[count++] = (byte) c; } for (; i < strlen; i++) { c = str.charAt(i); if ((c >= 0x0001) && (c <= 0x007F)) { bytearr[count++] = (byte) c; } else if (c > 0x07FF) { bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F)); bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); } else { bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F)); } } buffer.put(bytearr, 0, utflen + 2); } /** * Read a UTF8 string from a serialized buffer. * @param buffer buffer from which a UTF8 string is read * @return a Java String */ protected static String readUTF(ByteBuffer in) throws IOException { int utflen = in.getShort(); byte[] bytearr = new byte[utflen]; char[] chararr = new char[utflen]; int c, char2, char3; int count = 0; int chararr_count = 0; in.get(bytearr, 0, utflen); while (count < utflen) { c = (int) bytearr[count] & 0xff; if (c > 127) break; count++; chararr[chararr_count++] = (char) c; } while (count < utflen) { c = (int) bytearr[count] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* 0xxxxxxx */ count++; chararr[chararr_count++] = (char) c; break; case 12: case 13: /* 110x xxxx 10xx xxxx */ count += 2; if (count > utflen) throw new UTFDataFormatException( "malformed input: partial character at end"); char2 = (int) bytearr[count - 1]; if ((char2 & 0xC0) != 0x80) throw new UTFDataFormatException( "malformed input around byte " + count); chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; if (count > utflen) throw new UTFDataFormatException( "malformed input: partial character at end"); char2 = (int) bytearr[count - 2]; char3 = (int) bytearr[count - 1]; if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) throw new UTFDataFormatException( "malformed input around byte " + (count - 1)); chararr[chararr_count++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)); break; default: /* 10xx xxxx, 1111 xxxx */ throw new UTFDataFormatException("malformed input around byte " + count); } } // The number of chars produced may be less than utflen return new String(chararr, 0, chararr_count); } public static short getFileId(String file) { String[] peices = file.split("-"); return Short.parseShort( peices[2] ); } }