/* Copyright (c) 2006, Matthew Estes All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Metanotion Software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package net.metanotion.io.block.index; import java.io.IOException; import net.metanotion.io.Serializer; import net.metanotion.io.block.BlockFile; import net.metanotion.util.skiplist.SkipList; import net.metanotion.util.skiplist.SkipSpan; import net.i2p.util.Log; /** * On-disk format: * *<pre> * First Page: * Magic number (int) * overflow page (unsigned int) * previous page (unsigned int) * next page (unsigned int) * max keys (unsigned short) * number of keys (unsigned short) * for each key: * key length (unsigned short) * value length (unsigned short) * key data * value data * * Overflow pages: * Magic number (int) * next overflow page (unsigned int) *</pre> */ public class BSkipSpan<K extends Comparable<? super K>, V> extends SkipSpan<K, V> { protected static final int MAGIC = 0x5370616e; // "Span" protected static final int HEADER_LEN = 20; public static final int CONT_HEADER_LEN = 8; protected final BlockFile bf; private final BSkipList<K, V> bsl; protected int page; protected int overflowPage; protected int prevPage; protected int nextPage = 0; protected Serializer<K> keySer; protected Serializer<V> valSer; // I2P protected int spanSize; protected boolean isKilled; public static void init(BlockFile bf, int page, int spanSize) throws IOException { BlockFile.pageSeek(bf.file, page); bf.file.writeInt(MAGIC); bf.file.writeInt(0); bf.file.writeInt(0); bf.file.writeInt(0); bf.file.writeShort((short) spanSize); bf.file.writeShort(0); } @Override public SkipSpan<K, V> newInstance(SkipList<K, V> sl) { try { int newPage = bf.allocPage(); init(bf, newPage, bf.spanSize); return new BSkipSpan<K, V>(bf, (BSkipList<K, V>) sl, newPage, keySer, valSer); } catch (IOException ioe) { throw new RuntimeException("Error creating database page", ioe); } } @Override public void killInstance() { if (isKilled) { bf.log.error("Already killed!! " + this, new Exception()); return; } if (bf.log.shouldLog(Log.DEBUG)) bf.log.debug("Killing " + this); isKilled = true; try { int curPage = overflowPage; bf.freePage(page); freeContinuationPages(curPage); } catch (IOException ioe) { bf.log.error("Error freeing " + this, ioe); } bsl.spanHash.remove(Integer.valueOf(this.page)); } /** * Free a chain of continuation pages * @param curPage the first page to be freed, if 0 this does nothing. * @return number freed */ private int freeContinuationPages(int curPage) throws IOException { int rv = 0; while(curPage > 0) { BlockFile.pageSeek(bf.file, curPage); int magic = bf.file.readInt(); if (magic != BlockFile.MAGIC_CONT) throw new IOException("Bad SkipSpan magic number 0x" + Integer.toHexString(magic) + " on page " + curPage); int next = bf.file.readUnsignedInt(); bf.freePage(curPage); curPage = next; rv++; } return rv; } @Override public void flush() { fflush(); } /** * I2P - avoid super.flush() */ private void fflush() { if (isKilled) { bf.log.error("Already killed!! " + this, new Exception()); return; } try { BlockFile.pageSeek(bf.file, page); bf.file.writeInt(MAGIC); bf.file.writeInt(overflowPage); prevPage = (prev != null) ? ((BSkipSpan) prev).page : 0; nextPage = (next != null) ? ((BSkipSpan) next).page : 0; bf.file.writeInt(prevPage); bf.file.writeInt(nextPage); // if keys is null, we are (hopefully) just updating the prev/next pages on an unloaded span if (keys == null) return; bf.file.writeShort((short) keys.length); bf.file.writeShort((short) nKeys); if (nKeys <= 0 && prev != null) bf.log.error("Flushing with no entries?" + this, new Exception()); int curPage = this.page; int[] curNextPage = new int[1]; curNextPage[0] = this.overflowPage; int[] pageCounter = new int[1]; pageCounter[0] = HEADER_LEN; byte[] keyData; byte[] valData; for(int i=0;i<nKeys;i++) { if((pageCounter[0] + 4) > BlockFile.PAGESIZE) { if(curNextPage[0] == 0) { curNextPage[0] = bf.allocPage(); BlockFile.pageSeek(bf.file, curNextPage[0]); bf.file.writeInt(BlockFile.MAGIC_CONT); bf.file.writeInt(0); BlockFile.pageSeek(bf.file, curPage); bf.file.skipBytes(4); // skip magic bf.file.writeInt(curNextPage[0]); } BlockFile.pageSeek(bf.file, curNextPage[0]); curPage = curNextPage[0]; bf.file.skipBytes(4); // skip magic curNextPage[0] = bf.file.readUnsignedInt(); pageCounter[0] = CONT_HEADER_LEN; } // Drop bad entry without throwing exception if (keys[i] == null || vals[i] == null) { bf.log.error("Dropping null data in entry " + i + " page " + curPage + " key=" + this.keys[i] + " val=" + this.vals[i]); nKeys--; i--; continue; } keyData = this.keySer.getBytes(keys[i]); valData = this.valSer.getBytes(vals[i]); // Drop bad entry without throwing exception if (keyData.length > 65535 || valData.length > 65535) { bf.log.error("Dropping huge data in entry " + i + " page " + curPage + " keylen=" + keyData.length + " vallen=" + valData.length); nKeys--; i--; continue; } pageCounter[0] += 4; bf.file.writeShort(keyData.length); bf.file.writeShort(valData.length); curPage = bf.writeMultiPageData(keyData, curPage, pageCounter, curNextPage); curPage = bf.writeMultiPageData(valData, curPage, pageCounter, curNextPage); } BlockFile.pageSeek(bf.file, this.page); bf.file.skipBytes(4); // skip magic this.overflowPage = bf.file.readUnsignedInt(); if (curNextPage[0] != 0) { // free extra continuation pages BlockFile.pageSeek(bf.file, curPage); bf.file.skipBytes(4); // skip magic bf.file.writeInt(0); if (curPage == this.page) this.overflowPage = 0; try { int freed = freeContinuationPages(curNextPage[0]); if (bf.log.shouldLog(Log.DEBUG)) bf.log.debug("Freed " + freed + " continuation pages"); } catch (IOException ioe) { bf.log.error("Error freeing " + this, ioe); } } } catch (IOException ioe) { throw new RuntimeException("Error writing to database", ioe); } // FIXME can't get there from here //bsl.size -= fail; //bsl.flush(); } private static <X extends Comparable<? super X>, Y> void load(BSkipSpan<X, Y> bss, BlockFile bf, BSkipList<X, Y> bsl, int spanPage, Serializer<X> key, Serializer<Y> val) throws IOException { loadInit(bss, bf, bsl, spanPage, key, val); bss.loadData(); } /** * I2P - first half of load() * Only read the span headers */ protected static <X extends Comparable<? super X>, Y> void loadInit(BSkipSpan<X, Y> bss, BlockFile bf, BSkipList<X, Y> bsl, int spanPage, Serializer<X> key, Serializer<Y> val) throws IOException { if (bss.isKilled) throw new IOException("Already killed!! " + bss); bss.page = spanPage; bss.keySer = key; bss.valSer = val; bsl.spanHash.put(Integer.valueOf(spanPage), bss); BlockFile.pageSeek(bf.file, spanPage); int magic = bf.file.readInt(); if (magic != MAGIC) throw new IOException("Bad SkipSpan magic number 0x" + Integer.toHexString(magic) + " on page " + spanPage); bss.overflowPage = bf.file.readUnsignedInt(); bss.prevPage = bf.file.readUnsignedInt(); bss.nextPage = bf.file.readUnsignedInt(); bss.spanSize = bf.file.readUnsignedShort(); bss.nKeys = bf.file.readUnsignedShort(); if(bss.spanSize < 1 || bss.spanSize > SkipSpan.MAX_SIZE || bss.nKeys > bss.spanSize) { bf.log.error("Invalid span size " + bss.nKeys + " / "+ bss.spanSize); bss.nKeys = 0; bss.spanSize = bf.spanSize; } } /** * I2P - second half of load() * Load the whole span's keys and values into memory */ protected void loadData() throws IOException { loadData(true); } /** * I2P - second half of load() * Load the whole span's keys and values into memory * @param flushOnError set to false if you are going to flush anyway */ @SuppressWarnings("unchecked") protected void loadData(boolean flushOnError) throws IOException { if (isKilled) throw new IOException("Already killed!! " + this); this.keys = (K[]) new Comparable[this.spanSize]; this.vals = (V[]) new Object[this.spanSize]; int ksz, vsz; int curPage = this.page; int[] curNextPage = new int[1]; curNextPage[0] = this.overflowPage; int[] pageCounter = new int[1]; pageCounter[0] = HEADER_LEN; // System.out.println("Span Load " + sz + " nKeys " + nKeys + " page " + curPage); int fail = 0; for(int i=0;i<this.nKeys;i++) { if((pageCounter[0] + 4) > BlockFile.PAGESIZE) { BlockFile.pageSeek(this.bf.file, curNextPage[0]); int magic = bf.file.readInt(); if (magic != BlockFile.MAGIC_CONT) { bf.log.error("Lost " + (this.nKeys - i) + " entries - Bad SkipSpan magic number 0x" + Integer.toHexString(magic) + " on page " + curNextPage[0]); lostEntries(i, curPage); break; } curPage = curNextPage[0]; curNextPage[0] = this.bf.file.readUnsignedInt(); pageCounter[0] = CONT_HEADER_LEN; } ksz = this.bf.file.readUnsignedShort(); vsz = this.bf.file.readUnsignedShort(); pageCounter[0] +=4; byte[] k = new byte[ksz]; byte[] v = new byte[vsz]; int lastGood = curPage; try { curPage = this.bf.readMultiPageData(k, curPage, pageCounter, curNextPage); curPage = this.bf.readMultiPageData(v, curPage, pageCounter, curNextPage); } catch (IOException ioe) { bf.log.error("Lost " + (this.nKeys - i) + " entries - Error loading " + this + " on page " + curPage, ioe); lostEntries(i, lastGood); break; } // System.out.println("i=" + i + ", Page " + curPage + ", offset " + pageCounter[0] + " ksz " + ksz + " vsz " + vsz); this.keys[i] = this.keySer.construct(k); this.vals[i] = this.valSer.construct(v); // Drop bad entry without throwing exception if (this.keys[i] == null || this.vals[i] == null) { bf.log.error("Null deserialized data in entry " + i + " page " + curPage + " key=" + this.keys[i] + " val=" + this.vals[i]); fail++; nKeys--; i--; continue; } } // free any excess overflow pages? if (fail > 0) { bf.log.error("Repairing corruption of " + fail + " entries"); if (flushOnError) fflush(); // FIXME can't get there from here //bsl.size -= fail; //bsl.flush(); } } /** * Attempt to recover from corrupt data in this span. * All entries starting with firstBadEntry are lost. * Zero out the overflow page on lastGoodPage, * and corect the number of entries in the first page. * We don't attempt to free the lost continuation pages. */ protected void lostEntries(int firstBadEntry, int lastGoodPage) { try { this.nKeys = firstBadEntry; // zero overflow page pointer BlockFile.pageSeek(this.bf.file, lastGoodPage); bf.file.skipBytes(4); // skip magic bf.file.writeInt(0); // write new number of keys if (lastGoodPage != this.page) { BlockFile.pageSeek(this.bf.file, this.page); bf.file.skipBytes(18); } else { bf.file.skipBytes(10); } bf.file.writeShort(this.nKeys); } catch (IOException ioe) { bf.log.error("Error while recovering from corruption of " + this, ioe); } } protected BSkipSpan(BlockFile bf, BSkipList<K, V> bsl) { this.bf = bf; this.bsl = bsl; } public BSkipSpan(BlockFile bf, BSkipList<K, V> bsl, int spanPage, Serializer<K> key, Serializer<V> val) throws IOException { this.bf = bf; this.bsl = bsl; BSkipSpan.load(this, bf, bsl, spanPage, key, val); this.next = null; this.prev = null; BSkipSpan<K, V> bss = this; // findbugs ok (set in load() above) int np = nextPage; while(np != 0) { BSkipSpan<K, V> temp = bsl.spanHash.get(Integer.valueOf(np)); if(temp != null) { bss.next = temp; break; } bss.next = new BSkipSpan<K, V>(bf, bsl); bss.next.next = null; bss.next.prev = bss; bss = (BSkipSpan<K, V>) bss.next; BSkipSpan.load(bss, bf, bsl, np, key, val); np = bss.nextPage; } // Go backwards to fill in the rest. This never happens. bss = this; np = prevPage; while(np != 0) { BSkipSpan<K, V> temp = bsl.spanHash.get(Integer.valueOf(np)); if(temp != null) { bss.prev = temp; break; } bss.prev = new BSkipSpan<K, V>(bf, bsl); bss.prev.next = bss; bss.prev.prev = null; bss = (BSkipSpan<K, V>) bss.prev; BSkipSpan.load(bss, bf, bsl, np, key, val); np = bss.prevPage; } } @Override public String toString() { String rv = "BSS page: " + page + " key: \"" + firstKey() + '"'; if (isKilled) rv += " KILLED"; return rv; } }