/**
* Copyright 2011-2012 Akiban Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.persistit;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.util.Properties;
import java.util.Timer;
import java.util.TimerTask;
import org.junit.Test;
import com.persistit.Transaction.CommitPolicy;
import com.persistit.exception.CorruptJournalException;
import com.persistit.exception.CorruptVolumeException;
import com.persistit.exception.PersistitException;
import com.persistit.exception.PersistitIOException;
import com.persistit.unit.UnitTestProperties;
public class IOFailureTest extends PersistitUnitTestCase {
final static int BLOCKSIZE = 10000000;
/*
* This class needs to be in com.persistit rather than com.persistit.unit
* because it uses some package- private methods in Persistit.
*/
private final String _volumeName = "persistit";
@Override
protected Properties getProperties(final boolean cleanup) {
final Properties p = UnitTestProperties.getProperties(cleanup);
p.setProperty("journalsize", Integer.toString(BLOCKSIZE));
return p;
}
private ErrorInjectingFileChannel errorInjectingChannel(final FileChannel channel) {
final ErrorInjectingFileChannel eimfc = new ErrorInjectingFileChannel();
((MediatedFileChannel) channel).injectChannelForTests(eimfc);
return eimfc;
}
/**
* Simulate IOException on attempt to append to the journal. This simulates
* bug #878346. Sets an injected IOException on journal file .000000000001
* then stores a bunch of data until a failure occurs. Clears the injected
* error, runs one more transaction and then checks the resulting database
* state for correctness.
*
* @throws Exception
*/
@Test
public void testJournalUnwritable() throws Exception {
final Transaction txn = _persistit.getTransaction();
final ErrorInjectingFileChannel eifc = errorInjectingChannel(_persistit.getJournalManager().getFileChannel(
BLOCKSIZE));
/*
* Will cause any attempt to write into the second journal file to fail.
*/
eifc.injectDiskFullLimit(100000);
int at = 0;
for (;; at++) {
try {
txn.begin();
try {
store1(at);
txn.commit(CommitPolicy.HARD);
} finally {
txn.end();
}
} catch (final PersistitIOException e) {
if (e.getMessage().contains("Disk Full")) {
break;
// okay
} else {
throw e;
}
}
}
Thread.sleep(1000);
/*
* Now remove the disk full condition. Transaction should now succeed.
*/
eifc.injectDiskFullLimit(Long.MAX_VALUE);
txn.begin();
try {
store1(at);
txn.commit(CommitPolicy.HARD);
} finally {
txn.end();
}
final Exchange exchange = _persistit.getExchange(_volumeName, "IOFailureTest", false);
for (int i = 0; i < at + 5; i++) {
int count = 0;
exchange.clear().append(i).append(Key.BEFORE);
while (exchange.next()) {
count++;
}
assertEquals("Incorrect number of keys in tree", i <= at ? 5000 : 0, count);
}
}
/**
* Simulate IOException on attempt to read from journal during normal
* operation. The test sets up the journal is the sole source from which an
* attempt to read data can pull pages. Then it simulates a disk read
* failure, proves that the client receives an appropriate Exception, clears
* the failure condition, and finally proves that the client succeeds.
*
* @throws Exception
*/
@Test
public void testJournalUnreadable() throws Exception {
final String reason = "Read Failure";
store1(0);
final Volume volume = _persistit.getVolume(_volumeName);
/*
* Remove all pages from the pool
*/
volume.getPool().flush(Long.MAX_VALUE);
volume.getPool().invalidate(volume);
/*
* Make sure the pages can't be read back from the journal's write
* buffer
*/
_persistit.getJournalManager().force();
final Exchange ex = _persistit.getExchange(_volumeName, "IOFailureTest", false);
final ErrorInjectingFileChannel eifc = errorInjectingChannel(_persistit.getJournalManager().getFileChannel(0));
eifc.injectTestIOException(new IOException(reason), "r");
try {
ex.clear().append(0).next();
fail("Should have gotten an IOException");
} catch (final PersistitIOException ioe) {
assertEquals("Incorrect Exception thrown: " + ioe, reason, ioe.getCause().getMessage());
}
eifc.injectTestIOException(null, "");
assertEquals("Expected key not found", true, ex.clear().append(Key.BEFORE).next());
store1(1);
/*
* Remove all pages from the pool
*/
volume.getPool().flush(Long.MAX_VALUE);
/*
* Push all pages back to the Volume file.
*/
/*
* Make sure the pages can't be read back from the journal's write
* buffer
*/
_persistit.getJournalManager().force();
/*
* Inject IOException on journal reads. This should stall the copier
* process until the error condition is cleared.
*/
final ErrorInjectingFileChannel mfcj = errorInjectingChannel(_persistit.getJournalManager().getFileChannel(0));
mfcj.injectTestIOException(new IOException(reason), "r");
final long start = System.currentTimeMillis();
new Timer().schedule(new TimerTask() {
@Override
public void run() {
mfcj.injectTestIOException(null, "");
}
}, 2000);
copyBackEventuallySucceeds(start, reason);
}
/**
* Simulate IOException on reading from a Volume. Inserts a bunch of data,
* runs copyBack to get it all written to the volume, then clears the buffer
* pool and attempts to read it back.
*
* @throws Exception
*/
@Test
public void testVolumeUnreadable() throws Exception {
final String reason = "Read Failure";
store1(0);
final Exchange ex = _persistit.getExchange(_volumeName, "IOFailureTest", false);
final Volume volume = _persistit.getVolume(_volumeName);
/*
* Remove all pages from the pool
*/
volume.getPool().flush(Long.MAX_VALUE);
/*
* Make sure the pages can't be read back from the journal's write
* buffer
*/
_persistit.getJournalManager().force();
final ErrorInjectingFileChannel mfcj = errorInjectingChannel(_persistit.getJournalManager().getFileChannel(0));
/*
* Push all pages back to the Volume file.
*/
_persistit.copyBackPages();
volume.getPool().invalidate(volume);
ex.initCache();
/*
* Inject IOException on journal reads. This prevents pages from being
* read back from the journal. However, because copyBackPages has
* written all page images back to the Volume, no reads against the
* journal should occur.
*/
mfcj.injectTestIOException(new IOException(reason), "r");
/*
* This should succeed because the journal has been fully copied,
* therefore reads are coming from the Volume file itself.
*/
assertEquals(true, ex.clear().append(Key.BEFORE).next());
/*
* Clear out the buffer pool again.
*/
volume.getPool().invalidate(volume);
ex.initCache();
final ErrorInjectingFileChannel mfcv = errorInjectingChannel(volume.getStorage().getChannel());
mfcv.injectTestIOException(new IOException(reason), "r");
try {
ex.clear().append(0).next();
fail("Should have gotten an IOException");
} catch (final PersistitIOException ioe) {
assertEquals("Incorrect Exception thrown: " + ioe, reason, ioe.getCause().getMessage());
}
mfcv.injectTestIOException(null, "");
assertEquals("Expected key not found", true, ex.clear().append(Key.BEFORE).next());
}
@Test
public void testVolumeUnwritable() throws Exception {
final String reason = "Write Failure";
final Volume volume = _persistit.getVolume(_volumeName);
final ErrorInjectingFileChannel mfcv = errorInjectingChannel(volume.getStorage().getChannel());
mfcv.injectTestIOException(new IOException(reason), "w");
/*
* Should succeed since writes to volume are delayed
*/
store1(0);
/*
* Remove all pages from the pool
*/
volume.getPool().flush(Long.MAX_VALUE);
_persistit.getJournalManager().force();
/*
* This method should stall until we clear the injected IOException
*/
final long start = System.currentTimeMillis();
new Timer().schedule(new TimerTask() {
@Override
public void run() {
mfcv.injectTestIOException(null, "");
}
}, 2000);
copyBackEventuallySucceeds(start, reason);
volume.getPool().invalidate(volume);
}
@Test
public void testJournalEOFonRecovery() throws Exception {
final JournalManager jman = _persistit.getJournalManager();
final Exchange exchange = _persistit.getExchange(_volumeName, "RecoveryTest", true);
exchange.getValue().put(RED_FOX);
int count = 0;
long checkpointAddr = 0;
for (; jman.getCurrentAddress() < jman.getBlockSize() * 1.25;) {
if (jman.getCurrentAddress() - checkpointAddr > jman.getBlockSize() * 0.8) {
_persistit.checkpoint();
checkpointAddr = jman.getCurrentAddress();
}
exchange.to(count).store();
count++;
}
for (int i = 0; i < count + 100; i++) {
assertEquals(i < count, exchange.to(i).isValueDefined());
}
final long currentAddress = jman.getCurrentAddress();
_persistit.close();
final File file0 = jman.addressToFile(currentAddress - jman.getBlockSize());
final FileChannel channel0 = new RandomAccessFile(file0, "rw").getChannel();
final long size0 = channel0.size();
channel0.truncate(100);
channel0.close();
final File file1 = jman.addressToFile(currentAddress);
final FileChannel channel1 = new RandomAccessFile(file1, "rw").getChannel();
final long size1 = channel1.size();
channel1.truncate(100);
channel1.close();
_persistit = new Persistit();
_persistit.setConfiguration(_config);
try {
_persistit.initialize();
fail("Expected CorruptJournalException");
} catch (final CorruptJournalException cje) {
// expected
}
file1.delete();
_persistit = new Persistit();
_persistit.setConfiguration(_config);
try {
_persistit.initialize();
fail("Expected CorruptVolumeException");
} catch (final CorruptVolumeException cve) {
// expected
}
channel0.close();
channel1.close();
}
@Test
public void testPersistitIOExceptionReportsCauseMessage() throws Exception {
final ErrorInjectingFileChannel eifc = errorInjectingChannel(_persistit.getJournalManager().getFileChannel(0));
eifc.injectTestIOException(new IOException(RED_FOX), "w");
try {
_persistit.getJournalManager().writePageMap();
_persistit.getJournalManager().flush();
} catch (final PersistitIOException ioe) {
final String detail = ioe.getMessage();
assertTrue("Message does not include cause's message", detail.endsWith(RED_FOX));
} finally {
eifc.injectTestIOException(null, "");
}
}
/**
* Simulate IOException on attempt to append to the journal by the
* PAGE_WRITER. This simulates bug #916071. Sets an injected IOException on
* journal file .000000000001 then stores a bunch of data until a failure
* occurs. Clears the injected error, runs one more transaction and then
* checks the resulting database state for correctness. Differs from
* {@link #testJournalUnwritable()} in that transactions are not used; it is
* the PAGE_WRITER thread that gets the disk full errors.
*
* @throws Exception
*/
@Test
public void testDiskFullForPageWriter() throws Exception {
final JournalManager jman = _persistit.getJournalManager();
final ErrorInjectingFileChannel eifc = errorInjectingChannel(_persistit.getJournalManager().getFileChannel(
BLOCKSIZE));
/*
* Will cause any attempt to write into the second journal file to fail.
*/
eifc.injectDiskFullLimit(100000);
final int at = storeUntilDiskFull();
assertTrue("Journal size should be contrained", jman.getWriteBufferAddress() <= BLOCKSIZE + 100000);
Thread.sleep(5000);
/*
* Now remove the disk full condition. Transaction should now succeed.
*/
eifc.injectDiskFullLimit(Long.MAX_VALUE);
storeContinueAndCheck(at);
_persistit.checkAllVolumes();
_persistit.flush();
assertTrue("Journal should grow once more disk space is available",
jman.getWriteBufferAddress() > BLOCKSIZE + 100000);
}
@Test
public void diskFullForExtendVolume() throws Exception {
final Volume volume = _persistit.getVolume(_volumeName);
final ErrorInjectingFileChannel eifc = errorInjectingChannel(volume.getStorage().getChannel());
/*
* Will cause any attempt to extend the volume to fail.
*/
eifc.injectDiskFullLimit(100000);
final int at = storeUntilDiskFull();
Thread.sleep(5000);
/*
* Now remove the disk full condition. Transaction should now succeed.
*/
eifc.injectDiskFullLimit(Long.MAX_VALUE);
storeContinueAndCheck(at);
_persistit.checkAllVolumes();
_persistit.flush();
}
private int storeUntilDiskFull() throws Exception {
final Exchange exchange = _persistit.getExchange(_volumeName, "IOFailureTest", true);
exchange.getValue().put(RED_FOX);
int at = 0;
for (;; at++) {
try {
exchange.to(at).store();
} catch (final PersistitIOException e) {
if (e.getMessage().contains("Disk Full")) {
break;
// okay
} else {
throw e;
}
}
}
_persistit.releaseExchange(exchange);
return at;
}
private void storeContinueAndCheck(final int from) throws Exception {
final Exchange exchange = _persistit.getExchange(_volumeName, "IOFailureTest", true);
exchange.getValue().put(RED_FOX);
final int end = from + 10000;
for (int at = from; at < end; at++) {
exchange.to(at).store();
}
for (int i = 0; i < end + 10; i++) {
exchange.to(i).fetch();
assertEquals("Values should be completely updated", i < end, exchange.getValue().isDefined());
}
}
private void store1(final int at) throws PersistitException {
final Exchange exchange = _persistit.getExchange(_volumeName, "IOFailureTest", true);
final StringBuilder sb = new StringBuilder();
for (int i = 1; i <= 5000; i++) {
sb.setLength(0);
sb.append((char) (i / 20 + 64));
sb.append((char) (i % 20 + 64));
exchange.clear().append(at).append(sb);
exchange.getValue().put("Record #" + at + "_" + i);
exchange.store();
}
_persistit.releaseExchange(exchange);
}
private void copyBackEventuallySucceeds(final long start, final String reason) throws Exception {
final long expires = System.currentTimeMillis() + 10000;
boolean done = false;
while (System.currentTimeMillis() < expires) {
try {
/*
* Needed to avoid leaving a dirty page during checkpoint
*/
_persistit.flushStatistics();
_persistit.copyBackPages();
done = true;
break;
} catch (final PersistitIOException ioe) {
assertEquals("Incorrect Exception thrown: " + ioe, reason, ioe.getCause().getMessage());
}
}
final long elapsed = System.currentTimeMillis() - start;
assertTrue(done ? "Copyback took too long" : "Copyback did not complete", done && elapsed >= 2000);
assertEquals("Copyback did not move base address to end of journal", _persistit.getJournalManager()
.getCurrentAddress(), _persistit.getJournalManager().getBaseAddress());
}
}