/*- * Copyright (C) 2007 Erik Larsson * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.catacombae.hfs.io; import java.util.HashMap; import org.catacombae.io.ReadableFilterStream; import org.catacombae.io.ReadableRandomAccessStream; import org.catacombae.io.RuntimeIOException; /** * @author <a href="http://www.catacombae.org/" target="_top">Erik Larsson</a> */ public class ReadableBlockCachingStream extends ReadableFilterStream { /* * Keep track of the access count for every (?) block. The <itemCount> blocks with the highest * access count are kept in the cache. * This means that when we determine if a block should go in the cache or not, we need to find * the cache block with the lowest access count. * * Blocks in the cache are kept on basis of their access count, except for one block which is * the history block. In the future the implementation should allow for tuning how many of the * cache blocks are history blocks. */ /** The maximum age of an entry in the cache. When an entry's last accessed time has passed this age, it is thrown out, regardless of its access count. Unit: milliseconds. */ private static final long TIME_TO_KEEP_IN_CACHE = 5000; // Tune this later /** Block size. */ private final int blockSize; /** The logical file pointer. */ private long virtualFP; /** Length of the file. If the length of the underlying file should change, this one doesn't. */ private final long virtualLength; /** Hashtable mapping block numbers to BlockStore objects. Every block that has ever been accessed will get an entry here, which leads to uncontrollable memory allocation up to a maximum of length()/blockSize entries. TODO: Think out a smarter solution with a space limited data structure. */ private final HashMap<Long, BlockStore> blockMap = new HashMap<Long, BlockStore>(); /** Holds the cache items. One entry in the array is reserved for the previously read block, regardless of its access count, so that subsequent sequential reads won't suffer if accessCount isn't high enough. */ private final BlockStore[] cache; /** Set when the close method is called. Prohibits further access. */ private boolean closed = false; private static class BlockStore/* implements Comparable<LongContainer>*/ { public long accessCount = 0; public long lastAccessTime = Long.MAX_VALUE; public final long blockNumber; /** Might be null at any time when the data is thrown out of the cache. */ public byte[] data = null; public BlockStore(long blockNumber) { this.blockNumber = blockNumber; } } public ReadableBlockCachingStream(ReadableRandomAccessStream backing, int blockSize, int maxItemCount) { super(backing); //System.err.println("ReadableBlockCachingStream(" + backing + ", " + blockSize + ", " + maxItemCount + ");"); if(backing == null) throw new IllegalArgumentException("backing can not be null"); if(blockSize <= 0) throw new IllegalArgumentException("blockSize must be positive and non-zero"); if(maxItemCount < 1) throw new IllegalArgumentException("maxItemCount must be at least 1"); this.blockSize = blockSize; long length; try { length = backing.length(); } catch(Exception e) { length = -1; } if(length > 0) this.virtualLength = length; // Immutable else this.virtualLength = -1; int actualItemCount = maxItemCount; //System.err.println("ReadableBlockCachingStream created. virtualLength: " + virtualLength + " maxItemCount*blockSize: " + (maxItemCount*blockSize)); if(virtualLength > 0 && actualItemCount*blockSize > virtualLength) { actualItemCount = (int)( virtualLength/blockSize + ((virtualLength%blockSize != 0) ? 1 : 0) ); //System.err.println("Adjusted actualItemCount to " + actualItemCount); } this.cache = new BlockStore[actualItemCount]; } @Override public void seek(long pos) { if(closed) throw new RuntimeException("File is closed."); if((virtualLength == -1 || pos <= virtualLength) && pos >= 0) virtualFP = pos; else throw new IllegalArgumentException("pos out of range (pos=" + pos + ",virtualLength=" + virtualLength + ")"); } @Override public int read() { // Generic read() method byte[] b = new byte[1]; int res = read(b, 0, 1); if(res == 1) return b[0] & 0xFF; else return -1; } @Override public int read(byte[] data) { // Generic read(byte[]) method return read(data, 0, data.length); } @Override public int read(final byte[] data, final int pos, final int len) { if(closed) throw new RuntimeException("File is closed."); //System.out.println("ReadableBlockCachingStream.read(data, " + pos + ", " + len + ");"); //long fp = getFilePointer(); int bytesProcessed = 0; while(bytesProcessed < len) { byte[] blockData = getCachedBlock(virtualFP); int posInBlock = (int)(virtualFP - (virtualFP/blockSize)*blockSize); // Will deviate from fp with at most blockSize bytes, so int int bytesLeftInBlock = blockData.length-posInBlock; int bytesLeftInTransfer = len-bytesProcessed; int bytesToCopy = (bytesLeftInTransfer < bytesLeftInBlock ? bytesLeftInTransfer : bytesLeftInBlock); if(bytesLeftInBlock == 0) { /* If bytesLeftInBlock is 0 here, we have visisted this block * before but didn't manage to get the amount of bytes that we * requested. Since the block still has the same size, we are * requesting data that is beyond the end of the file. */ throw new RuntimeIOException("Attempted to read after the " + "end of the file."); } System.arraycopy(blockData, posInBlock, data, pos+bytesProcessed, bytesToCopy); bytesProcessed += bytesToCopy; virtualFP += bytesToCopy; } return bytesProcessed; } @Override public void readFully(byte[] data) { // Generic readFully(byte[]) method readFully(data, 0, data.length); } @Override public void readFully(byte[] data, int offset, int length) { // Generic readFully(byte[], int, int) method int bytesRead = 0; while(bytesRead < length) { int curBytesRead = read(data, offset+bytesRead, length-bytesRead); if(curBytesRead > 0) bytesRead += curBytesRead; else throw new RuntimeException("Couldn't read the entire length."); } } @Override public long length() { if(closed) throw new RuntimeException("File is closed."); return virtualLength; } @Override public long getFilePointer() { if(closed) throw new RuntimeException("File is closed."); return virtualFP; } @Override public void close() { closed = true; backingStore.close(); } /** * If the block is present in the cache, return it immediately. Otherwise * read the block from the backing store, put it the backing store */ private byte[] getCachedBlock(long filePointer) { final long blockNumber = filePointer / blockSize; // 1. Increment access count and access time BlockStore cur = blockMap.get(blockNumber); if(cur == null) { cur = new BlockStore(blockNumber); blockMap.put(blockNumber, cur); } ++cur.accessCount; cur.lastAccessTime = System.currentTimeMillis(); // 2. Get the data if(cur.data != null) { //System.out.println(" HIT at block number " + blockNumber + "!"); // 2.1 Just return the data that's in the cache return cur.data; } else { // If we get here, cur is not present in the cache. (It only has data if it's present in the cache) //System.out.println(" MISS at block number " + blockNumber + "!"); // 2.2 Fetch data from backing store and put in cache IF it has a high enough access count. // (We should maintain a "last accessed" block as well) // Throw out the last entry (if any) from the cache and fetch its data array. // Also remove its data array. If it is a standard sized array, we can reuse it and save the garbage // collector and heap allocator some work. BlockStore lastCacheEntry = cache[cache.length-1]; cache[cache.length-1] = null; byte[] recoveredData = null; if(lastCacheEntry != null) { recoveredData = lastCacheEntry.data; lastCacheEntry.data = null; // Stole your array. if(recoveredData == null) throw new RuntimeException("Entry in cache had a null array, which should never happen!"); } // Read data from backing store long blockPos = blockNumber*blockSize; long remainingSize = length()-blockPos; long dataSize = remainingSize < blockSize ? remainingSize : blockSize; byte[] data; if(recoveredData != null && dataSize == recoveredData.length) data = recoveredData; else { // Will only happen if (1) cache isn't full or (2) if we are dealing with the last block int size = (int)(dataSize <= 0 ? blockSize : dataSize); // TODO: Investigate the effect of this approach (setting the array size to blockSize for all block in -1 virtualLength streams). data = new byte[size]; } //System.err.println(" Seeking to " + blockPos + " (block number: " + blockNumber + ", blockSize: " + blockSize + ", data.length: " + data.length + ")"); backingStore.seek(blockPos); backingStore.read(data, 0, data.length); // Place cur in the cache and make sure it goes to the right position. Time is O(cache.length) cur.data = data; cache[cache.length-1] = cur; bubbleIntoPosition(cache, cache.length-1); return cur.data; } } private static void bubbleIntoPosition(BlockStore[] array, int startIndex) { long timestamp = System.currentTimeMillis(); for(int i = startIndex; i >= 1; --i) { BlockStore low = array[i]; BlockStore high = array[i-1]; if(high == null || // Array has not been filled low.accessCount > high.accessCount || // The access count of the new item is greater than the old one's (timestamp - high.lastAccessTime) >= TIME_TO_KEEP_IN_CACHE) { // The old one is too old to be kept in cache /* //DEBUG if(!(high == null || low.accessCount > high.accessCount)) System.out.println("Moving down a block in cache because of age! Age=" + (timestamp - high.lastAccessTime)); */ // Switch places array[i] = high; array[i-1] = low; } } } /** Loads as much data as possible into memory starting at position 0. */ public void preloadBlocks() { preloadBlocks(0, cache.length); } /** Not exposed as public interface because the outside might not know how many blocks there are. */ private void preloadBlocks(int startBlock, int blockCount) { for(int i = 0; i < blockCount; ++i) { System.err.println("Preloading block " + (startBlock+i) + "..."); getCachedBlock((startBlock+i)*blockSize); } } }