/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Aug 22, 2006 */ package com.bigdata.io; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.text.NumberFormat; import java.util.Random; import java.util.concurrent.TimeUnit; import junit.framework.TestCase2; /** * Low level IO performance tests in support of bigdata design options. * * @todo Develop test to compare efficiency of an ordered write to random writes * of the same pages. Use a random sequence of pages. Selection without * replacement is an option, but not required. The store size should be * larger than the #of pages to be written, and the #of pages to be * written should be at least 5x the disk cache (8-32M). * * @todo Installation reads are when you need to read a page from the database * so that you can update some rows on that page from a journal. Write * tests to determine if installation reads might be optimized by ordered * reads from a <em>region</em> of the database to get the pages into * memory followed by updating those pages from the journal and then an * ordered write to install the dirty pages back onto the database. * Unfortunately you can not use nio to directly communicate an ordered * write, e.g., with an array of buffers together with their target file * offsets. That is a situation then where not writing through the disk * cache and letting the drive optimize a series of queued write * operations would be advantageous. * <p> * Explore the possibility of asynchronous IO vs synchronous IO for the * database. I think that you would have to use the FileChannel directly * from multiple threads in order to make asynchronous requests to * multiple offsets and get some queue depth for the read/write operations * (FileChannel does not support asynchronous operations from a single * thread). Otherwise the ordered reads (and ordered writes) will involve * yielding to permit each IO to be synchronous. That could still be * efficient if you could interleave a bunch of IOs onto the same track of * the disk, reading pages, updating them from the most recent committed * state of the objects in the journal for that page, and then writing * pages once they had been updated. That would minimize head movement * since you are just waiting for the right part of the disk to come * around again. If I can identify AIO support for FileChannel then * refactor appropriate tests into a TestAIO class. * * @see Detailed information about storage hardware is available under windows * using * <code>Programs > Accessories > System Tools > System Information</code>. * See the <code> Components > Storage > Disks </code> view. * * @see http://www.jroller.com/page/cpurdy/20040907 * @see http://alphaworks.ibm.com/tech/aio4j * @see http://coconut.codehaus.org/ * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public class TestIO extends TestCase2 { /** * */ public TestIO() { this(null); } /** * @param arg0 */ public TestIO(String arg0) { super(arg0); /* * Set up formatting for integers. */ nf = NumberFormat.getIntegerInstance(); // grouping is disabled since we write comma-separated-value files. nf.setGroupingUsed(true); fpf = NumberFormat.getNumberInstance(); fpf.setGroupingUsed(false); fpf.setMaximumFractionDigits(2); } /** * Formatting useful for integers and floating point values that need to be * rounded to integers. If the value is in milliseconds, and you want to * write it in seconds then first divide by 1000. If the value is units per * millisecond and you want to write units per second, then compute and * format <code>units/milliseconds*1000</code>. */ final NumberFormat nf; /** * Formatting useful for floating point values with at most two digits after * the decimal. */ final NumberFormat fpf; /** * Computes units/second given units and nanoseconds. * * @param units * The units, e.g., the #of triples loaded. * @param nanos * The nanoseconds. * * @return Units/seconds, e.g., the #of pages written per second. If * <i>nanos</i> is zero(0) then this method returns zero. */ private double getUnitsPerSecond(long units,long nanos) { if( nanos == 0 ) return 0d; return (((double)units)/nanos)*1000000000L; } public void test_formats() { System.err.println("\nnf: "); System.err.println("12.1 : " + nf.format(12.1f)); System.err.println("12.5 : " + nf.format(12.5f)); System.err.println("12.6 : " + nf.format(12.6f)); System.err.println("112.5 : " + nf.format(112.5f)); System.err.println("1125.5 : " + nf.format(1125.5f)); /* * Example of formatting for a units/sec value. Note the cast to * floating point before dividing the units by the milliseconds and * _then_ multiple through by 1000 to get units/sec. */ System.err.println("400/855*1000 : " + nf.format(400. / 855 * 1000)); System.err.println("\nfpf: "); System.err.println("12.1 : " + fpf.format(12.1f)); System.err.println("12.5 : " + fpf.format(12.5f)); System.err.println("12.6 : " + fpf.format(12.6f)); System.err.println("112.5 : " + fpf.format(112.5f)); /* * Example of formatting for a units/sec value. Note the cast to * floating point before dividing the units by the milliseconds and * _then_ multiple through by 1000 to get units/sec. */ System.err.println("400/855*1000 : " + fpf.format(400. / 855 * 1000)); } public void test_units() { System.err.println("One second is " + nf.format(TimeUnit.SECONDS.toNanos(1)) + " nanoseconds"); System.err.println("One second is " + nf.format(TimeUnit.SECONDS.toMicros(1)) + " microseconds"); System.err.println("One second is " + nf.format(TimeUnit.SECONDS.toMillis(1)) + " milliseconds"); System.err.println("One kilobyte is "+nf.format(KiloByte)); System.err.println("One megabyte is "+nf.format(MegaByte)); System.err.println("One gigabyte is "+nf.format(GigaByte)); System.err.println("One terabyte is "+nf.format(TeraByte)); System.err.println("One petabyte is "+nf.format(PetaByte)); System.err.println("One exabyte is "+nf.format(ExaByte)); // System.exit(1); } final private static int KiloByte = 1024; final private static int MegaByte = 1024*1024; final private static int GigaByte = 1024*1024*1024; final private static int TeraByte = 1024*1024*1024*1024; final private static int PetaByte = 1024*1024*1024*1024*1024; final private static int ExaByte = 1024*1024*1024*1024*1024*1024; /** * Test of raw IO performance for random writes. * * @throws IOException */ public void test_001() throws IOException { // page size. final int pageSize = 8 * KiloByte; System.err.println("pageSize=" + nf.format(pageSize) + " bytes"); // assumption about the disk cache size. final int diskCacheSize = 8 * MegaByte; System.err.println("diskCacheSize=" + nf.format(diskCacheSize) + " bytes"); final int pagesInDiskCache = diskCacheSize / pageSize; System.err.println("pagesInDiskCache=" + nf.format(pagesInDiskCache)); /* * #of pages to write. This value effects the utility of the disk cache. * If all pages fit in the cache, then the writes can be absorbed * directly by the cache without pausing for disk IO. As the #of pages * written begins to exceed the cache, the cache becomes less effective. * Since this benchmark is meant to indicate sustained load, this value * should be at least 5x. */ final int pagesToWrite = 10 * pagesInDiskCache; System.err.println("pagesToWrite=" + nf.format(pagesToWrite)); /* * maximum file length (in pages). This value effects the amount that * the head must move when seeking within the file. In order to be * effective, this value should be choosen based on knowledge of the * format of the disk, including the sector size and the #of sectors per * track. When IOs are located within the same track, the head stays * still and waits for the right sector to come around. When IOs cover * multiple tracks, the head must seek among those tracks. The maximum * value is limited by the free space on the drive. The drive should be * defragmented before running this test so that the allocated file will * be a contiguous extent to the greatest extent possible. */ final int maxPages = pagesToWrite * 10; System.err.println("maxPages=" + nf.format(maxPages) + ", maxLength=" + nf.format(((long) maxPages * pageSize))); // when true, the data will write through to disk with each IO. final boolean writeThrough = true; // when true, the data will be forced to disk after it is all written. final boolean synchAfterTest = true; final Random r = new Random(); /* * Create a temporary file for the test. You can specify the directory * using an optional argument as a means of choosing which disk drive or * partition to use for the test. */ final File file = File.createTempFile("test", ".dbCache" // , new File("D:/") ); System.err.println("file=" + file); final RandomAccessFile raf = new RandomAccessFile(file, (writeThrough ? "rwd" : "rw")); try { final FileChannel fileChannel = raf.getChannel(); /* * Allocate direct buffer. */ final ByteBuffer buf = ByteBuffer.allocateDirect(pageSize); /* * Extend the file to its maximum size. We set the limit to one * before extending the file so that we only write the very last * byte of the extent. We then restore the limit to the capacity of * the buffer, since that is its initial condition and the * assumption through the rest of this code. */ assert buf.limit() == buf.capacity(); long maxOffset = (long) maxPages * pageSize - 1; buf.limit(1); fileChannel.write( buf, maxOffset ); buf.limit(buf.capacity()); long startNanos = System.nanoTime(); for (int i = 0; i < pagesToWrite; i++) { // offset of page. final long pos = r.nextInt(maxPages) * (long) pageSize; assert pos <= maxOffset; /* * Reset position so that the next write will transfer the * entire buffer contents. If you don't do this then it will * only write the buffer the first time through since the * position defaults to zero and the limit defaults to the * capacity. */ // buf.limit(pageSize); buf.position(0); final int nwritten; try { nwritten = fileChannel.write(buf, pos); } catch (IllegalArgumentException ex) { System.err.println("pos=" + pos); throw ex; } assertEquals("iteration=" + i + ", nwritten", pageSize, nwritten); } if (synchAfterTest) { // force data, but do not force metadata. fileChannel.force(false); } final long endNanos = System.nanoTime(); final long elapsedNanos = endNanos - startNanos; // System.err.println("startNanos="+nf.format(startNanos)); // System.err.println("endNanos="+nf.format(endNanos)); System.err.println("Wrote " + nf.format(pagesToWrite) + " pages of " + pageSize + " bytes in " + TimeUnit.NANOSECONDS.toSeconds(elapsedNanos) + " secs" ); System.err.println(""+nf.format(getUnitsPerSecond(pagesToWrite, elapsedNanos)) + " pages per second"); final long megabytesWritten = pagesToWrite * pageSize / MegaByte; System.err.println(""+fpf.format(megabytesWritten)+" megabytes written"); System.err.println("" + fpf.format(getUnitsPerSecond(megabytesWritten, elapsedNanos)) + " megabytes per second"); System.err.println("" + fpf.format(getUnitsPerSecond(megabytesWritten*8, elapsedNanos)) + " megabits per second"); System.err.println("writeThroughIOs="+writeThrough); System.err.println("synchAfterTest="+synchAfterTest); System.err.println("bytes on disk: " + nf.format(raf.length())); } finally { raf.close(); if (!file.delete()) { throw new RuntimeException("Could not delete file: " + file); } if (log.isInfoEnabled()) log.info("deleted: " + file); } } /** * Low level test of the ability to modify a file a sync it to the * underlying storage media. This test is interested in the absolute maximum * syncs per second that the hardware can support. * * @throws IOException */ public void test_commitsPerSec() throws IOException { // page size. final int pageSize = 512;// * KiloByte; if (log.isInfoEnabled()) log.info("pageSize=" + nf.format(pageSize) + " bytes"); /* * The #of commits. We update the same page for each commit and then * sync the FileChannel to the disk. */ final int numCommits = 10000; if (log.isInfoEnabled()) log.info("numCommits=" + nf.format(numCommits) + " commits"); /* * Create a temporary file for the test. You can specify the directory * using an optional argument as a means of choosing which disk drive or * partition to use for the test. */ final File file = File.createTempFile("test", ".dbCache"); final RandomAccessFile raf = new RandomAccessFile(file, "rw"); try { final FileChannel fileChannel = raf.getChannel(); /* * Allocate direct buffer. */ final ByteBuffer buf = ByteBuffer.allocateDirect(pageSize); /* * Extend the file to its maximum size. We set the limit to one * before extending the file so that we only write the very last * byte of the extent. We then restore the limit to the capacity of * the buffer, since that is its initial condition and the * assumption through the rest of this code. */ assert buf.limit() == buf.capacity(); FileChannelUtility.writeAll(fileChannel, buf, 0L/* pos */); long startNanos = System.nanoTime(); // Always update the first byte. final long pos = 0L; for (int i = 0; i < numCommits; i++) { buf.position(0); buf.limit(pageSize); // overwrite the buffer. final byte v = (byte) (i % 256); for (int off = 0; off < pageSize; off++) { buf.put(off/* index */, v/* newValue */); } // write on page. FileChannelUtility.writeAll(fileChannel, buf, pos); // Sync the channel to the disk. fileChannel.force(false/* metadata */); } final long endNanos = System.nanoTime(); final long elapsedNanos = endNanos - startNanos; System.err.println("Did " + nf.format(numCommits) + " commits in " + TimeUnit.NANOSECONDS.toSeconds(elapsedNanos) + " secs"); System.err.println("" + nf.format(getUnitsPerSecond(numCommits, elapsedNanos)) + " pages per second"); } finally { raf.close(); if (!file.delete()) { throw new RuntimeException("Could not delete file: " + file); } } } }