/* Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Nov 19, 2007 */ package com.bigdata.journal; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import java.util.Random; import org.apache.log4j.Logger; import junit.framework.AssertionFailedError; import junit.framework.TestCase; import com.bigdata.util.Bytes; /** * Test suite for conformance with synchronous IO write requests made on a * {@link RandomAccessFile} opened using <code>rss</code> or <code>rdd</code> * mode. A conforming platform will NOT reorder writes and will a request to * {@link FileChannel#force(boolean)} will flush data through to stable media * before the write operation returns. A write cache in the operating system, * device driver, controller, or disk drive can defeat synchronous IO with the * results that: (a) the sequence in which writes are issued by the application * may not be the sequence in which the data are laid down on the disk; and (b) * the write operation may return before the data are stable on disk. * <p> * Both of these results can break the semantics of the atomic commit protocol * in at least the following ways: * <ul> * <li> If the root blocks are updated before the application data are on stable * media then a power failure will cause the application to read from the new * root block but the application data will not have been written.</li> * <li> If the write operation returns before the data are on stable media then * the application may conclude that the commit was successful when in fact the * data are not yet on disk. In fact, since the error is not reported * synchronously the application may never learn that the write has failed * unless it continues to write on the disk and a subsequent write turns up the * error.</li> * </ol> * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public class TestRandomAccessFileSynchronousWrites extends TestCase { protected static final Logger log = Logger .getLogger(TestRandomAccessFileSynchronousWrites.class); public TestRandomAccessFileSynchronousWrites() { } public TestRandomAccessFileSynchronousWrites(String name) { super(name); } /** * Test verifies whether or not the platform appears to perform synchronous * IOs when creating a {@link RandomAccessFile} with mode <code>rws</code>. */ public void test_syncWrites_rds() throws IOException { // Performance comparison when NOT requesting synchronous IO. final Stats rw = doSyncWriteTest("rw"); final Stats rws = doSyncWriteTest("rws"); assertWriteCacheDisabled(rw, rws); } /** * Test verifies whether or not the platform appears to perform synchronous * IOs when creating a {@link RandomAccessFile} with mode <code>rws</code>. */ public void test_syncWrites_rdd() throws IOException { // Performance comparison when NOT requesting synchronous IO. final Stats rw = doSyncWriteTest("rw"); final Stats rwd = doSyncWriteTest("rwd"); assertWriteCacheDisabled(rw, rwd); } /** * Verify that the request to sync to disk with each IO (<code>rws</code> or * <code>rwd</code>) was honored by the underlying platform. * * @param baseline * The <code>rw</code> performance {@link Stats}. * @param syncio * The performance for either the <code>rws</code> or * <code>rwd</code> modes, which require synchronization to the * disk after each write. * * @throws AssertionFailedError * unless the write IOPs are significantly lower for the * <i>syncio</i> condition. */ protected void assertWriteCacheDisabled(final Stats baseline, final Stats syncio) { final double ratio = Math .round(100. * (syncio.writesPerSec / (double) baseline.writesPerSec)) / 100.; final String msg = "ratio=" + ratio + ", " + baseline + ", " + syncio; if (ratio > .5) { /* * We are seeing more write operations per second (or more bytes * written per second) than can reasonably be expected synchronous * writes. */ fail("Write cache in effect: " + msg); } System.out.println(msg); } /** * Test helper attempts to detect when a request for synchronous writes is * being ignored by the platform. * <p> * Note: The file is created using the temp file mechanisms so this is only * really testing the behavior of the disk on which the temp files are * stored. * <p> * Note: The more write operations that you request and the larger the file * on which those writes are randomly distributed the more you are likely to * defeat any cache mechanisms. * * <pre> * Results on a laptop class platform (Windows XP): * * write cache disabled in platform: * * elapsed=5063ms, mode=rwd, writesPerSec=988, bytesPerSec=1011258 * elapsed=5031ms, mode=rws, writesPerSec=994, bytesPerSec=1017690 * elapsed=109ms, mode=rw, writesPerSec=45872, bytesPerSec=46972477 * * write cache enabled in platform: * * elapsed=1797ms, mode=rwd, writesPerSec=2782, bytesPerSec=2849193 * elapsed=1969ms, mode=rws, writesPerSec=2539, bytesPerSec=2600305 * elapsed=62ms, mode=rw, writesPerSec=80645, bytesPerSec=82580645 * * </pre> * * Based on the data above, you can see that merely requesting synchronous * IO in Java clearly does not disable all layers of the write cache. * * @param mode * The file mode to be used. * * @return The {@link Stats} for that mode. */ protected Stats doSyncWriteTest(final String mode) throws IOException { final Random r = new Random(); // #of records to write. final int LIMIT = 5000; // maximum size of the file on which the records will be written. final int MAXSIZE = 100 * Bytes.kilobyte32; // size of each record. final int RECSIZE = 1 * Bytes.kilobyte32; // create a record with random data. final byte[] record = new byte[RECSIZE]; r.nextBytes(record); // create a temp file. final File file = File.createTempFile(getName(), ".tmp"); try { // Note: can also test with rwd (synchronous metadata updates also). RandomAccessFile f = new RandomAccessFile(file, mode); try { final long begin = System.currentTimeMillis(); for (int i = 0; i < LIMIT; i++) { f.seek(r.nextInt(MAXSIZE)); f.write(record); final long elapsed = System.currentTimeMillis() - begin; if(elapsed>5000) { System.err.println("Test is taking too long - IO must be synchronous :-)"); break; } } f.getChannel().force(true/*metaData*/); final long elapsed = System.currentTimeMillis() - begin; final long writesPerSec = (long)((LIMIT * 1000. / elapsed)+0.5); final long bytesPerSec = (long)((LIMIT*RECSIZE*1000./elapsed)+.5); final Stats stats = new Stats(mode,elapsed,writesPerSec,bytesPerSec); return stats; } finally { f.close(); } } finally { if (!file.delete()) log.warn("Could not delete: file=" + file); } } private static class Stats { final String mode; final long elapsed, writesPerSec, bytesPerSec; public Stats(final String mode, final long elapsed, final long writesPerSec, final long bytesPerSec) { this.mode = mode; this.elapsed = elapsed; this.writesPerSec = writesPerSec; this.bytesPerSec = bytesPerSec; } public String toString() { return "elapsed=" + elapsed + "ms, mode=" + mode + ", writesPerSec=" + writesPerSec + ", bytesPerSec=" + bytesPerSec; } } }