/** * Copyright 2012 Akiban Technologies, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.persistit; import static org.junit.Assert.assertEquals; import java.io.PrintWriter; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import org.junit.Test; import com.persistit.unit.UnitTestProperties; /** * https://bugs.launchpad.net/akiban-persistit/+bug/1017957 * * During the past week the 8-hour stress test suite has generated several * CorruptVolumeExceptions and other related phenomena. Examples: * * Stress6 [main] FAILED: com.persistit.exception.CorruptVolumeException: Volume * persistit(/tmp/persistit_tests/persistit) level=0 page=15684 * initialPage=57164 * key=<{"stress6",98,5,"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}> walked * right more than 50 pages last page visited=81324 at * com.persistit.Exchange.corrupt(Exchange.java:3884) at * com.persistit.Exchange.searchLevel(Exchange.java:1250) at * com.persistit.Exchange.searchTree(Exchange.java:1125) at * com.persistit.Exchange.storeInternal(Exchange.java:1443) at * com.persistit.Exchange.store(Exchange.java:1294) at * com.persistit.Exchange.store(Exchange.java:2534) at * com.persistit.stress.unit.Stress6.executeTest(Stress6.java:98) at * com.persistit.stress.AbstractStressTest.run(AbstractStressTest.java:93) at * java.lang.Thread.run(Thread.java:662) * * Stress2txn [main] FAILED: com.persistit.exception.RebalanceException at * com.persistit.Buffer.join(Buffer.java:2523) at * com.persistit.Exchange.raw_removeKeyRangeInternal(Exchange.java:3367) at * com.persistit.Exchange.removeKeyRangeInternal(Exchange.java:3070) at * com.persistit.Exchange.removeInternal(Exchange.java:2999) at * com.persistit.Exchange.remove(Exchange.java:2927) at * com.persistit.stress.unit.Stress2txn.executeTest(Stress2txn.java:231) at * com.persistit.stress.AbstractStressTest.run(AbstractStressTest.java:93) at * java.lang.Thread.run(Thread.java:662) * * Stress2txn [main] FAILED: com.persistit.exception.CorruptVolumeException: * LONG_RECORD chain is invalid at page 111919 - invalid page type: Page 111,919 * in volume persistit(/tmp/persistit_tests/persistit) at index 1,559 * timestamp=909,787,072 status=vr1 type=Data at * com.persistit.LongRecordHelper.corrupt(LongRecordHelper.java:243) at * com.persistit.LongRecordHelper.fetchLongRecord(LongRecordHelper.java:103) at * com.persistit.Exchange.fetchFixupForLongRecords(Exchange.java:2841) at * com.persistit.Exchange.fetchFromValueInternal(Exchange.java:2778) at * com.persistit.Exchange.fetchFromBufferInternal(Exchange.java:2747) at * com.persistit.Exchange.traverse(Exchange.java:2157) at * com.persistit.Exchange.traverse(Exchange.java:1960) at * com.persistit.Exchange.traverse(Exchange.java:1897) at * com.persistit.Exchange.next(Exchange.java:2330) at * com.persistit.stress.unit.Stress2txn.executeTest(Stress2txn.java:188) at * com.persistit.stress.AbstractStressTest.run(AbstractStressTest.java:93) at * java.lang.Thread.run(Thread.java:662) * * Bug mechanism #1: * * An obscure path through Exchange#raw_removeKeyRangeInternal inserts a * key-pointer pair into an index page. It does so after removing all claims on * pages and the tree itself. After removing claims, before inserting the * key-pointer pair we believe the page itself gets put unto a garbage chain. So * after the re-insertion, the index page now has a pointer to a page that will * be reused and will contain unrelated data. * * * Bug mechanism #2: An obscure path through Exchange#raw_removeKeyRangeInternal * performs a structure delete (i.e., joins one more pairs of pages) but fails * to bump the Tree generation. The allows use of a stale LevelCache array. * * This test procedure exhibited both bug mechanisms reliably within 10 seconds * prior to fixing the code. We also implemented a test method based on the * ThreadSequencer to precisely elaborate the sequence of interactions between * two threads that cause the failure. However, the bug fix eliminates the code * path that allows the sequencer to work, so the test was removed. * * @author peter * */ public class Bug1017957Test extends PersistitUnitTestCase { @Override protected Properties getProperties(final boolean cleanup) { return UnitTestProperties.getBiggerProperties(cleanup); } private final long STRESS_NANOS = 10L * 1000000000L; /** * * @throws Exception */ @Test public void induceCorruptionByStress() throws Exception { final long expiresAt = System.nanoTime() + STRESS_NANOS; final AtomicInteger totalErrors = new AtomicInteger(); final Thread t1 = new Thread(new Runnable() { @Override public void run() { int count = 0; int errors = 0; try { final Exchange ex = _persistit.getExchange("persistit", "Bug1017957Test", true); while (System.nanoTime() < expiresAt) { try { final Key key = createUnsafeStructure(ex); removeInterestingKey(ex, key); if (++count % 5000 == 0) { System.out.printf("T1 iterations %,d\n", count); } } catch (final Exception e) { if (++errors < 10) { e.printStackTrace(); } totalErrors.incrementAndGet(); } } } catch (final Exception e) { throw new RuntimeException(e); } } }); final Thread t2 = new Thread(new Runnable() { @Override public void run() { int count = 0; int errors = 0; try { final Exchange ex = _persistit.getExchange("persistit", "Bug1017957Test", true); while (System.nanoTime() < expiresAt) { try { removeCoveringRange(ex); insertOtherStuff(ex); if (++count % 5000 == 0) { System.out.printf("T2 iterations %,d\n", count); } } catch (final Exception e) { if (++errors < 10) { e.printStackTrace(); } totalErrors.incrementAndGet(); } } } catch (final Exception e) { if (++errors < 10) { e.printStackTrace(); } } } }); t1.start(); t2.start(); t1.join(); t2.join(); final IntegrityCheck icheck = new IntegrityCheck(_persistit); icheck.setMessageLogVerbosity(Task.LOG_VERBOSE); icheck.setMessageWriter(new PrintWriter(System.out)); icheck.checkVolume(_persistit.getVolume("persistit")); System.out.printf("\nTotal errors %d", totalErrors.get()); assertEquals("Corrupt volume", 0, icheck.getFaults().length); assertEquals("Exception occurred", 0, totalErrors.get()); } /** * Create a B-Tree with a structure that will induce a deferred index * insertion on removal of key. We need an index page that's pretty full * such that removing a key and inserting a different one will result in * splitting the index page. * * @throws Exception */ private Key createUnsafeStructure(final Exchange ex) throws Exception { Key result = null; final String v = createString(5500); // less than long record final String k = createString(1040); for (int i = 1000; i < 1019; i++) { if (i == 1009) { ex.clear().append(i).append(k.substring(0, 20)); ex.getValue().put("interesting"); ex.store(); result = new Key(ex.getKey()); } ex.clear().append(i).append(k); ex.getValue().put(v); ex.store(); } return result; } private void removeInterestingKey(final Exchange ex, final Key interestingKey) throws Exception { interestingKey.copyTo(ex.getKey()); ex.remove(); } private void removeCoveringRange(final Exchange ex) throws Exception { final Key key1 = new Key(_persistit).append(1005); final Key key2 = new Key(_persistit).append(1015); ex.removeKeyRange(key1, key2); } private void insertOtherStuff(final Exchange ex) throws Exception { for (int k = 0; k < 100; k++) { ex.clear().append(1009).append(k).append(RED_FOX); ex.getValue().put(RED_FOX); ex.store(); } } }