/** * Copyright 2011-2012 Akiban Technologies, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.persistit; import static org.junit.Assert.assertTrue; import org.junit.Test; /** * Failure detected during TPCC testing. Upon restarting server, the following * error was emitted: * * [main] ERROR Recovery failed due to * com.persistit.exception.CorruptJournalException: Missing journal file * /home/akiban * /dev/persistit/fix-pruning-deadlock2/bench/run/../data/BenchmarkSQL_journal * .000000000011 - the journal needs to be repaired or discarded * * This is a false positive. The data is not actually corrupt. RecoveryManager * thinks it needs journal file 11 because the transaction map at the front of * file 12 lists transactions that started in file 11. However, all of these * aborted and were subsequently pruned, and the lastValidCheckpoint in file 12 * has a base address in file 12. * * Upon further examination, the circumstances are a little different than my * initial view. Journal file 12 had several checkpoint records. Each checkpoint * contains the base address current at the time that checkpoint was written. * The base value actually _decreased_ between the first and second CP records * and then increased again. This indicates an unknown failure mechanism in the * JournalManager. * * Note that this failure occurred while the wwLock bug 923761 was occurring. * One side-effect of that bug would be for a thread to have a start timestamp * and then be delayed by up to a minute before being aborted. */ public class Bug927701Test extends PersistitUnitTestCase { @Test public void testBug927701() throws Exception { final JournalManager jman = _persistit.getJournalManager(); disableBackgroundCleanup(); jman.setCopierInterval(1000); final long blockSize = jman.getBlockSize(); /* * 1. Add at least 4MB + of stuff to the journal */ { final Transaction txn = _persistit.getTransaction(); txn.begin(); final Exchange exchange = _persistit.getExchange("persistit", "Bug927701Test", true); exchange.getValue().put(RED_FOX); int index = 0; while ((jman.getCurrentAddress() % blockSize) < JournalManager.ROLLOVER_THRESHOLD) { exchange.to(index++).store(); } txn.commit(); txn.end(); } /* * 2. Write part of a transaction, then abort. */ final long journalAddress = jman.getCurrentAddress(); final Transaction abortingTxn = _persistit.getTransaction(); abortingTxn.begin(); final Exchange exchange = _persistit.getExchange("persistit", "Bug927701Test", false); // write enough stuff to overflow and flush the transaction buffer exchange.getValue().put(RED_FOX.toUpperCase()); final int count = 65536 / (RED_FOX.length()); for (int index = 0; index < count; index++) { exchange.to(index).store(); } abortingTxn.rollback(); /* * Checkpoint to advance base address */ _persistit.checkpoint(); /* * Wait for CleanupManager call to pruneObsoleteTransactions */ Thread.sleep(2000); /* * Copy-back to discharge any remaining pages in the journal. Note that * this method also calls checkpoint beforehand. */ _persistit.copyBackPages(); /* * Another checkpoint to move the base address again, this time to the * very end of the journal. */ _persistit.checkpoint(); /* * Wait for a rollover triggered by journal copier */ for (int wait = 5; --wait >= 0;) { if (jman.getCurrentAddress() / blockSize != journalAddress / blockSize) { break; } assertTrue(wait > 0); System.out.printf("Cur=%,d base=%,d lvc=%,d\n", jman.getCurrentAddress(), jman.getBaseAddress(), jman.getLastValidCheckpointBaseAddress()); Thread.sleep(1000); } abortingTxn.end(); final long baseAddress1 = jman.getBaseAddress(); /* * Force checkpoint; should now flush the aborted transaction buffers. */ _persistit.checkpoint(); final long baseAddress2 = jman.getBaseAddress(); assertTrue(baseAddress2 >= baseAddress1); } }