package org.infinispan.persistence.sifs; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.infinispan.commons.equivalence.Equivalence; import org.infinispan.util.TimeService; import org.infinispan.util.logging.Log; import org.infinispan.util.logging.LogFactory; /** * Keeps the entry positions persisted in a file. It consists of couple of segments, each for one modulo-range * of key's hashcodes (according to DataContainer's key equivalence configuration) - writes to each index segment * are performed by single thread, having multiple segments spreads the load between them. * * @author Radim Vansa <rvansa@redhat.com> */ class Index { private static final Log log = LogFactory.getLog(Index.class); private static final boolean trace = log.isTraceEnabled(); private static final int GRACEFULLY = 0x512ACEF0; private static final int DIRTY = 0xD112770C; protected static final int INDEX_FILE_HEADER_SIZE = 30; private final String indexDir; private final FileProvider fileProvider; private final Compactor compactor; private final int minNodeSize; private final int maxNodeSize; private final ReadWriteLock lock = new ReentrantReadWriteLock(); private final Segment[] segments; private final TimeService timeService; private final Equivalence<Object> keyEquivalence; public Index(FileProvider fileProvider, String indexDir, int segments, int minNodeSize, int maxNodeSize, IndexQueue indexQueue, TemporaryTable temporaryTable, Compactor compactor, TimeService timeService, Equivalence<Object> keyEquivalence) throws IOException { this.fileProvider = fileProvider; this.compactor = compactor; this.timeService = timeService; this.keyEquivalence = keyEquivalence; this.indexDir = indexDir; this.minNodeSize = minNodeSize; this.maxNodeSize = maxNodeSize; new File(indexDir).mkdirs(); this.segments = new Segment[segments]; for (int i = 0; i < segments; ++i) { this.segments[i] = new Segment(i, indexQueue.subQueue(i), temporaryTable); } } /** * @return True if the index was loaded from well persisted state */ public boolean isLoaded() { for (int i = 0; i < segments.length; ++i) { if (!segments[i].loaded) return false; } return true; } public void start() { for (int i = 0; i < segments.length; ++i) { segments[i].start(); } } /** * Get record or null if expired * * @param key * @param serializedKey * @return * @throws IOException */ public EntryRecord getRecord(Object key, byte[] serializedKey) throws IOException { int segment = Math.abs(keyEquivalence.hashCode(key)) % segments.length; lock.readLock().lock(); try { return IndexNode.applyOnLeaf(segments[segment], serializedKey, segments[segment].rootReadLock(), IndexNode.ReadOperation.GET_RECORD); } finally { lock.readLock().unlock(); } } /** * Get position or null if expired * * @param key * @param serializedKey * @return * @throws IOException */ public EntryPosition getPosition(Object key, byte[] serializedKey) throws IOException { int segment = Math.abs(keyEquivalence.hashCode(key)) % segments.length; lock.readLock().lock(); try { return IndexNode.applyOnLeaf(segments[segment], serializedKey, segments[segment].rootReadLock(), IndexNode.ReadOperation.GET_POSITION); } finally { lock.readLock().unlock(); } } /** * Get position + numRecords, without expiration * * @param key * @param serializedKey * @return * @throws IOException */ public EntryInfo getInfo(Object key, byte[] serializedKey) throws IOException { int segment = Math.abs(keyEquivalence.hashCode(key)) % segments.length; lock.readLock().lock(); try { return IndexNode.applyOnLeaf(segments[segment], serializedKey, segments[segment].rootReadLock(), IndexNode.ReadOperation.GET_INFO); } finally { lock.readLock().unlock(); } } public void clear() throws IOException { lock.writeLock().lock(); try { ArrayList<CountDownLatch> pauses = new ArrayList<CountDownLatch>(); for (Segment seg : segments) { pauses.add(seg.pauseAndClear()); } for (CountDownLatch pause : pauses) { pause.countDown(); } } catch (InterruptedException e) { throw new RuntimeException(e); } finally { lock.writeLock().unlock(); } } public void stopOperations() throws InterruptedException { for (Segment seg : segments) { seg.stopOperations(); } } public long size() throws InterruptedException { long size = 0; for (Segment seg : segments) { size += seg.size(); } return size; } class Segment extends Thread { private final BlockingQueue<IndexRequest> indexQueue; private final TemporaryTable temporaryTable; private final TreeMap<Integer, List<IndexSpace>> freeBlocks = new TreeMap<Integer, List<IndexSpace>>(); private final ReadWriteLock rootLock = new ReentrantReadWriteLock(); private final File indexFileFile; private final boolean loaded; private FileChannel indexFile; private long indexFileSize = 0; private AtomicLong size = new AtomicLong(); private volatile IndexNode root; private Segment(int id, BlockingQueue<IndexRequest> indexQueue, TemporaryTable temporaryTable) throws IOException { super("BCS-IndexUpdater-" + id); this.setDaemon(true); this.indexQueue = indexQueue; this.temporaryTable = temporaryTable; this.indexFileFile = new File(indexDir, "index." + id); this.indexFile = new RandomAccessFile(indexFileFile, "rw").getChannel(); indexFile.position(0); ByteBuffer buffer = ByteBuffer.allocate(INDEX_FILE_HEADER_SIZE); if (indexFile.size() >= INDEX_FILE_HEADER_SIZE && read(indexFile, buffer) && buffer.getInt(0) == GRACEFULLY) { long rootOffset = buffer.getLong(4); short rootOccupied = buffer.getShort(12); long freeBlocksOffset = buffer.getLong(14); size.set(buffer.getLong(22)); root = new IndexNode(this, rootOffset, rootOccupied); loadFreeBlocks(freeBlocksOffset); indexFileSize = freeBlocksOffset; loaded = true; } else { this.indexFile.truncate(0); root = IndexNode.emptyWithLeaves(this); loaded = false; // reserve space for shutdown indexFileSize = INDEX_FILE_HEADER_SIZE; } buffer.putInt(0, DIRTY); buffer.position(0); buffer.limit(4); indexFile.position(0); write(indexFile, buffer); } private void write(FileChannel indexFile, ByteBuffer buffer) throws IOException { do { int written = indexFile.write(buffer); if (written < 0) { throw new IllegalStateException("Cannot write to index file!"); } } while (buffer.position() < buffer.limit()); } private boolean read(FileChannel indexFile, ByteBuffer buffer) throws IOException { do { int read = indexFile.read(buffer); if (read < 0) { return false; } } while (buffer.position() < buffer.limit()); return true; } @Override public void run() { try { int counter = 0; while (true) { if (++counter % 30000 == 0) { log.debug("Queue size is " + indexQueue.size()); } final IndexRequest request = indexQueue.take(); if (trace) log.trace("Indexing " + request); IndexNode.OverwriteHook overwriteHook; IndexNode.RecordChange recordChange; switch (request.getType()) { case CLEAR: IndexRequest cleared; while ((cleared = indexQueue.poll()) != null) { cleared.setResult(false); } CountDownLatch pause = new CountDownLatch(1); request.setResult(pause); log.debug("Waiting for cleared"); pause.await(); continue; case DELETE_FILE: // the last segment that processes the delete request actually deletes the file if (request.countDown()) { fileProvider.deleteFile(request.getFile()); compactor.releaseStats(request.getFile()); } continue; case STOP: assert indexQueue.poll() == null; shutdown(); return; case GET_SIZE : request.setResult(size.get()); continue; case MOVED: recordChange = IndexNode.RecordChange.MOVE; overwriteHook = new IndexNode.OverwriteHook() { @Override public boolean check(int oldFile, int oldOffset) { return oldFile == request.getPrevFile() && oldOffset == request.getPrevOffset(); } @Override public void setOverwritten(boolean overwritten, int prevFile, int prevOffset) { if (overwritten && request.getOffset() < 0 && request.getPrevOffset() >= 0) { size.decrementAndGet(); } } }; break; case UPDATE: recordChange = IndexNode.RecordChange.INCREASE; overwriteHook = new IndexNode.OverwriteHook() { @Override public void setOverwritten(boolean overwritten, int prevFile, int prevOffset) { request.setResult(overwritten); if (request.getOffset() >= 0 && prevOffset < 0) { size.incrementAndGet(); } else if (request.getOffset() < 0 && prevOffset >= 0) { size.decrementAndGet(); } } }; break; case DROPPED: recordChange = IndexNode.RecordChange.DECREASE; overwriteHook = new IndexNode.OverwriteHook() { @Override public void setOverwritten(boolean overwritten, int prevFile, int prevOffset) { if (request.getPrevFile() == prevFile && request.getPrevOffset() == prevOffset) { size.decrementAndGet(); } } }; break; case FOUND_OLD: recordChange = IndexNode.RecordChange.INCREASE_FOR_OLD; overwriteHook = IndexNode.OverwriteHook.NOOP; break; default: throw new IllegalArgumentException(request.toString()); } try { IndexNode.setPosition(root, request.getSerializedKey(), request.getFile(), request.getOffset(), request.getSize(), overwriteHook, recordChange); } catch (IllegalStateException e) { throw new IllegalStateException(request.toString(), e); } temporaryTable.removeConditionally(request.getKey(), request.getFile(), request.getOffset()); } } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (Throwable e) { log.error("Error in indexer thread", e); } finally { try { indexFile.close(); } catch (IOException e) { log.error("Failed to close/delete the index", e); } } } private void shutdown() throws IOException { IndexSpace rootSpace = allocateIndexSpace(root.length()); root.store(rootSpace); indexFile.position(indexFileSize); ByteBuffer buffer = ByteBuffer.allocate(4); buffer.putInt(0, freeBlocks.size()); write(indexFile, buffer); for (Map.Entry<Integer, List<IndexSpace>> entry : freeBlocks.entrySet()) { List<IndexSpace> list = entry.getValue(); int requiredSize = 8 + list.size() * 10; buffer = buffer.capacity() < requiredSize ? ByteBuffer.allocate(requiredSize) : buffer; buffer.position(0); buffer.limit(requiredSize); buffer.putInt(entry.getKey()); buffer.putInt(list.size()); for (IndexSpace space : list) { buffer.putLong(space.offset); buffer.putShort((short) space.length); } buffer.flip(); write(indexFile, buffer); } int headerWithoutMagic = INDEX_FILE_HEADER_SIZE - 4; buffer = buffer.capacity() < headerWithoutMagic ? ByteBuffer.allocate(headerWithoutMagic) : buffer; buffer.putLong(0, rootSpace.offset); buffer.putShort(8, (short) rootSpace.length); buffer.putLong(10, indexFileSize); buffer.putLong(18, size.get()); buffer.position(0); buffer.limit(headerWithoutMagic); indexFile.position(4); write(indexFile, buffer); buffer.putInt(0, GRACEFULLY); buffer.position(0); buffer.limit(4); indexFile.position(0); write(indexFile, buffer); } private void loadFreeBlocks(long freeBlocksOffset) throws IOException { indexFile.position(freeBlocksOffset); ByteBuffer buffer = ByteBuffer.allocate(8); buffer.limit(4); if (!read(indexFile, buffer)) { throw new IOException("Cannot read free blocks lists!"); } int numLists = buffer.getInt(0); for (int i = 0; i < numLists; ++i) { buffer.position(0); buffer.limit(8); if (!read(indexFile, buffer)) { throw new IOException("Cannot read free blocks lists!"); } int blockLength = buffer.getInt(0); int listSize = buffer.getInt(4); int requiredSize = 10 * listSize; buffer = buffer.capacity() < requiredSize ? ByteBuffer.allocate(requiredSize) : buffer; buffer.position(0); buffer.limit(requiredSize); if (!read(indexFile, buffer)) { throw new IOException("Cannot read free blocks lists!"); } buffer.flip(); ArrayList<IndexSpace> list = new ArrayList<>(listSize); for (int j = 0; j < listSize; ++j) { list.add(new IndexSpace(buffer.getLong(), buffer.getShort())); } freeBlocks.put(blockLength, list); } } public CountDownLatch pauseAndClear() throws InterruptedException, IOException { IndexRequest clear = IndexRequest.clearRequest(); indexQueue.put(clear); CountDownLatch pause = (CountDownLatch) clear.getResult(); root = IndexNode.emptyWithLeaves(this); indexFile.truncate(0); indexFileSize = INDEX_FILE_HEADER_SIZE; freeBlocks.clear(); size.set(0); return pause; } public long size() throws InterruptedException { IndexRequest sizeRequest = IndexRequest.sizeRequest(); indexQueue.put(sizeRequest); return (Long) sizeRequest.getResult(); } public FileChannel getIndexFile() { return indexFile; } public FileProvider getFileProvider() { return fileProvider; } public Compactor getCompactor() { return compactor; } public IndexNode getRoot() { // this has to be called with rootLock locked! return root; } public void setRoot(IndexNode root) { rootLock.writeLock().lock(); this.root = root; rootLock.writeLock().unlock(); } public int getMaxNodeSize() { return maxNodeSize; } public int getMinNodeSize() { return minNodeSize; } // this should be accessed only from the updater thread IndexSpace allocateIndexSpace(int length) { Map.Entry<Integer, List<IndexSpace>> entry = freeBlocks.ceilingEntry(length); if (entry == null || entry.getValue().isEmpty()) { long oldSize = indexFileSize; indexFileSize += length; return new IndexSpace(oldSize, length); } else { return entry.getValue().remove(entry.getValue().size() - 1); } } // this should be accessed only from the updater thread void freeIndexSpace(long offset, int length) { if (length <= 0) throw new IllegalArgumentException("Offset=" + offset + ", length=" + length); // TODO: fragmentation! // TODO: memory bounds! if (offset + length < indexFileSize) { List<IndexSpace> list = freeBlocks.get(length); if (list == null) { freeBlocks.put(length, list = new ArrayList<IndexSpace>()); } list.add(new IndexSpace(offset, length)); } else { indexFileSize -= length; try { indexFile.truncate(indexFileSize); } catch (IOException e) { log.warn("Cannot truncate index", e); } } } public Lock rootReadLock() { return rootLock.readLock(); } public void stopOperations() throws InterruptedException { indexQueue.put(IndexRequest.stopRequest()); this.join(); } public TimeService getTimeService() { return timeService; } } /** * Offset-length pair */ static class IndexSpace { protected long offset; protected int length; public IndexSpace(long offset, int length) { this.offset = offset; this.length = length; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || !(o instanceof IndexSpace)) return false; IndexSpace innerNode = (IndexSpace) o; if (length != innerNode.length) return false; if (offset != innerNode.offset) return false; return true; } @Override public int hashCode() { int result = (int) (offset ^ (offset >>> 32)); result = 31 * result + length; return result; } @Override public String toString() { return String.format("[%d-%d(%d)]", offset, offset + length, length); } } }