/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Jul 25, 2007 */ package com.bigdata.journal; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.PrintWriter; import java.nio.ByteBuffer; import java.util.Date; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.TreeMap; import org.apache.log4j.Logger; import com.bigdata.btree.AbstractBTree; import com.bigdata.btree.BaseIndexStats; import com.bigdata.btree.DumpIndex; import com.bigdata.btree.ICheckpointProtocol; import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.IndexTypeEnum; import com.bigdata.htree.AbstractHTree; import com.bigdata.io.ChecksumUtility; import com.bigdata.io.SerializerUtil; import com.bigdata.relation.RelationSchema; import com.bigdata.rwstore.IRWStrategy; import com.bigdata.rwstore.IStore; import com.bigdata.rwstore.RWStore; import com.bigdata.rwstore.RWStore.DeleteBlockStats; import com.bigdata.sparse.GlobalRowStoreSchema; import com.bigdata.sparse.ITPS; import com.bigdata.sparse.SparseRowStore; import com.bigdata.stream.Stream; import com.bigdata.util.Bytes; import com.bigdata.util.BytesUtil; import com.bigdata.util.InnerCause; /** * A utility class that opens the journal in a read-only mode and dumps the root * blocks and metadata about the indices on a journal file. * * TODO add an option to dump only as of a specified commitTime? * * TODO GIST : Support all types of indices. * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/585"> GIST * </a> * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class DumpJournal { private static final Logger log = Logger.getLogger(DumpJournal.class); /** * Dump out the Global Row Store. * * TODO Raise as parameter, put on main(), and clean up the code. */ private static final boolean dumpGRS = false; /** * Validate the delete blocks (RWStore only). If there are double- deletes * in the delete blocks, then log out more information about those * addresses. * * TODO Raise as parameter, put on main(), and clean up the code. */ private static final boolean validateDeleteBlocks = false; // public DumpJournal() { // // } /** * Dump one or more journal files: * * <pre> * usage: (option*) filename+ * </pre> * * where <i>option</i> is any of: * <dl> * <dt>-namespace</dt> * <dd>Dump only those indices having the specified namespace prefix.</dd> * <dt>-history</dt> * <dd>Dump metadata for indices in all commit records (default only dumps * the metadata for the indices as of the most current committed state).</dd> * <dt>-indices</dt> * <dd>Dump the indices (does not show the tuples by default).</dd> * <dt>-pages</dt> * <dd>Dump the pages of the indices and reports some information on the * page size.</dd> * <dt>-tuples</dt> * <dd>Dump the records in the indices.</dd> * </dl> * * where <i>filename</i> is one or more journal file names. */ // FIXME feature is not finished. Must differentiate different address types. // * <dt>-addr ADDR</dt> // * <dd>Dump the record at that address on the store.</dd> public static void main(final String[] args) { if (args.length == 0) { System.err.println("usage: (-namespace <namespace>|-history|-indices|-pages|-tuples|-addr <address>) <filename>+"); System.exit(1); } int i = 0; // Zero or more namespaces to be dumped. All are dumped if none are // specified. final List<String> namespaces = new LinkedList<String>(); boolean dumpHistory = false; boolean dumpIndices = false; boolean dumpPages = false; boolean showTuples = false; boolean alternateRootBlock = false; final List<Long> addrs = new LinkedList<Long>(); for(; i<args.length; i++) { String arg = args[i]; if( ! arg.startsWith("-")) { // End of options. break; } if(arg.equals("-history")) { dumpHistory = true; } else if(arg.equals("-namespace")) { namespaces.add(args[i + 1]); i++; } else if(arg.equals("-indices")) { dumpIndices = true; } else if(arg.equals("-pages")) { dumpPages = true; } else if(arg.equals("-tuples")) { showTuples = true; } else if(arg.equals("-alternateRootBlock")) { alternateRootBlock = true; } else if(arg.equals("-addr")) { addrs.add(Long.valueOf(args[i + 1])); i++; } else throw new RuntimeException("Unknown argument: " + arg); } for (; i < args.length; i++) { final File file = new File(args[i]); try { /* * Stat the file and report on its size, etc. */ { System.out.println("File: "+file); if(!file.exists()) { System.err.println("No such file"); System.exit(1); } if(!file.isFile()) { System.err.println("Not a regular file"); System.exit(1); } System.out.println("Length: "+file.length()); System.out.println("Last Modified: "+new Date(file.lastModified())); } final Properties properties = new Properties(); { properties.setProperty(Options.FILE, file.toString()); properties.setProperty(Options.READ_ONLY, "" + true); if (alternateRootBlock) properties.setProperty(Options.ALTERNATE_ROOT_BLOCK, "" + true); properties.setProperty(Options.BUFFER_MODE, BufferMode.Disk.toString()); } System.out.println("Opening (read-only): " + file); final Journal journal = new Journal(properties); try { final DumpJournal dumpJournal = new DumpJournal(journal); final PrintWriter out = new PrintWriter(System.out, true/* autoFlush */); try { dumpJournal.dumpJournal(out, namespaces, dumpHistory, dumpPages, dumpIndices, showTuples); for (Long addr : addrs) { out.println("addr=" + addr + ", offset=" + journal.getOffset(addr) + ", length=" + journal.getByteCount(addr)); // Best effort attempt to dump the record. out.println(dumpJournal.dumpRawRecord(addr)); } out.flush(); } finally { out.close(); } } finally { journal.close(); } } catch( Throwable t) { t.printStackTrace(); System.err.println("Error: " + t + " on file: " + file); // Abnormal completion. System.exit(1); } System.out.println("=================================="); } System.out.println("Normal completion"); } /** * * @param dumpHistory * Dump metadata for indices in all commit records (default only * dumps the metadata for the indices as of the most current * committed state). * @param dumpPages * Dump the pages of the indices and reports some information on * the page size. * @param dumpIndices * Dump the indices (does not show the tuples by default). * @param showTuples * Dump the records in the indices. */ public void dumpJournal(final boolean dumpHistory, final boolean dumpPages, final boolean dumpIndices, final boolean showTuples) { final PrintWriter w = new PrintWriter(System.out, true/* autoFlush */); try { dumpJournal(w, null/* namespaces */, dumpHistory, dumpPages, dumpIndices, showTuples); w.flush(); } finally { // Note: DO NOT close stdout! // w.close(); } } /** * @param out * Where to write the output. * @param namespaces * When non-empty and non-<code>null</code>, dump only those * indices having any of the specified namespaces. * @param dumpHistory * Dump metadata for indices in all commit records (default only * dumps the metadata for the indices as of the most current * committed state). * @param dumpPages * Dump the pages of the indices and reports some information on * the page size. * @param dumpIndices * Dump the indices (does not show the tuples by default). * @param showTuples * Dump the records in the indices. */ public void dumpJournal(final PrintWriter out, final List<String> namespaces, final boolean dumpHistory, final boolean dumpPages, final boolean dumpIndices, final boolean showTuples) { // Note: This does not fix the issue. // /** // * Start a transaction. This will bracket all index access and protect // * the data on the journal from concurrent recycling. // * // * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/762"> // * DumpJournal does not protect against concurrent updates (NSS) // * </a> // */ // final long tx = journal.newTx(ITx.READ_COMMITTED); // try { // final FileMetadata fmd = journal.getFileMetadata(); if (fmd != null) { /* * Note: The FileMetadata is only available on a re-open of an * existing Journal. */ // dump the MAGIC and VERSION. out.println("magic=" + Integer.toHexString(fmd.magic)); out.println("version=" + Integer.toHexString(fmd.version)); /* * Report on: * * - the length of the journal. - the #of bytes available for * user data in the journal. - the offset at which the next * record would be written. - the #of bytes remaining in the * user extent. */ final long bytesAvailable = (fmd.userExtent - fmd.nextOffset); out.println("extent=" + fmd.extent + "(" + fmd.extent / Bytes.megabyte + "M)" + ", userExtent=" + fmd.userExtent + "(" + fmd.userExtent / Bytes.megabyte + "M)" + ", bytesAvailable=" + bytesAvailable + "(" + bytesAvailable / Bytes.megabyte + "M)" + ", nextOffset=" + fmd.nextOffset); } { /* * Dump the root blocks. * * Note: This uses the IBufferStrategy to access the root * blocks. The code used to use the FileMetadata, but that was * only available for a re-opened journal. This approach works * for a new Journal as well. */ { final ByteBuffer rootBlock0 = journal.getBufferStrategy() .readRootBlock(true/* rootBlock0 */); if (rootBlock0 != null) { out.println(new RootBlockView( true/* rootBlock0 */, rootBlock0, new ChecksumUtility()).toString()); } } { final ByteBuffer rootBlock1 = journal.getBufferStrategy() .readRootBlock(false/* rootBlock0 */); if (rootBlock1 != null) { out.println(new RootBlockView( false/* rootBlock0 */, rootBlock1, new ChecksumUtility()).toString()); } } // out.println(fmd.rootBlock0.toString()); // out.println(fmd.rootBlock1.toString()); // report on which root block is the current root block. out.println("The current root block is #" + (journal.getRootBlockView().isRootBlock0() ? 0 : 1)); } final IBufferStrategy strategy = journal.getBufferStrategy(); if (strategy instanceof RWStrategy) { final RWStore store = ((RWStrategy) strategy).getStore(); { final StringBuilder sb = new StringBuilder(); store.showAllocators(sb); out.println(sb); } // Validate the logged delete blocks. if (validateDeleteBlocks) { final DeleteBlockStats stats = store.checkDeleteBlocks(journal); out.println(stats.toString(store)); final Set<Integer> duplicateAddrs = stats .getDuplicateAddresses(); if(!duplicateAddrs.isEmpty()) { for(int latchedAddr : duplicateAddrs) { final byte[] b; try { b = store.readFromLatchedAddress(latchedAddr); } catch (IOException ex) { log.error("Could not read: latchedAddr=" + latchedAddr, ex); continue; } final ByteBuffer buf = ByteBuffer.wrap(b); final Object obj = decodeData(buf); if (obj == null) { System.err.println("Could not decode: latchedAddr=" + latchedAddr); final StringBuilder sb = new StringBuilder(); BytesUtil.printHexString(sb, BytesUtil.toHexString(b, b.length)); System.err.println("Undecoded record:" + sb.toString()); } else { System.err.println("Decoded record: latchedAddr=" + latchedAddr + " :: class=" + obj.getClass() + ", object=" + obj.toString()); } } } } } /* * Note: A read-only view is used since the Journal could be exposed to * concurrent operations through the NSS. */ final CommitRecordIndex commitRecordIndex = journal .getReadOnlyCommitRecordIndex(); out.println("There are " + commitRecordIndex.getEntryCount() + " commit points."); if (dumpGRS) { dumpGlobalRowStore(out); } if (dumpHistory) { out.println("Historical commit points follow in temporal sequence (first to last):"); // final IKeyBuilder keyBuilder = KeyBuilder.newInstance(Bytes.SIZEOF_LONG); // // final long targetTime = 1303505388420L; // int indexOf = commitRecordIndex.indexOf(keyBuilder.reset() // .append(targetTime).getKey()); // if (indexOf < 0) // indexOf = (-(indexOf) - 1); // // // @todo handle leading/trailing edge cases. // final long fromTime = KeyBuilder.decodeLong(commitRecordIndex.keyAt(indexOf-3), 0/*off*/); // final long toTime = KeyBuilder.decodeLong(commitRecordIndex.keyAt(indexOf+3), 0/*off*/); // // final ITupleIterator<CommitRecordIndex.Entry> itr = commitRecordIndex.rangeIterator( // keyBuilder.reset().append(fromTime+1).getKey(), // keyBuilder.reset().append(toTime+1).getKey() // ); @SuppressWarnings("unchecked") final ITupleIterator<CommitRecordIndex.Entry> itr = commitRecordIndex.rangeIterator(); while(itr.hasNext()) { out.println("----"); final CommitRecordIndex.Entry entry = itr.next().getObject(); out.print("Commit Record: " + entry.commitTime + ", addr=" + journal.toString(entry.addr)+", "); final ICommitRecord commitRecord = journal .getCommitRecord(entry.commitTime); out.println(commitRecord.toString()); dumpNamedIndicesMetadata(out, namespaces, commitRecord, dumpPages, dumpIndices, showTuples); } } else { /* * Dump the current commit record. */ final ICommitRecord commitRecord = journal.getCommitRecord(); out.println(commitRecord.toString()); dumpNamedIndicesMetadata(out, namespaces, commitRecord, dumpPages, dumpIndices, showTuples); } // } finally { // journal.abort(tx); // } } private final Journal journal; public DumpJournal(final Journal journal) { if (journal == null) throw new IllegalArgumentException(); this.journal = journal; } private void dumpGlobalRowStore(final PrintWriter out) { final SparseRowStore grs = journal.getGlobalRowStore(journal .getLastCommitTime()); { final Iterator<? extends ITPS> itr = grs .rangeIterator(GlobalRowStoreSchema.INSTANCE); while(itr.hasNext()) { final ITPS tps = itr.next(); out.println(tps.toString()); } } // The schema for "relations". { final Iterator<? extends ITPS> itr = grs .rangeIterator(RelationSchema.INSTANCE); while(itr.hasNext()) { final ITPS tps = itr.next(); out.println(tps.toString()); } } } /** * Dump metadata about each named index as of the specified commit record. * * @param dumpPages * When <code>true</code>, the index pages will be recursively * scanned to collect statistics about the index. */ private void dumpNamedIndicesMetadata(final PrintWriter out, final List<String> namespaces, final ICommitRecord commitRecord, final boolean dumpPages, final boolean dumpIndices, final boolean showTuples) { final Iterator<String> nitr = journal.indexNameScan(null/* prefix */, commitRecord.getTimestamp()); final Map<String, BaseIndexStats> pageStats = new TreeMap<String, BaseIndexStats>(); while (nitr.hasNext()) { // a registered index. final String name = nitr.next(); if (namespaces != null && !namespaces.isEmpty()) { boolean found = false; for(String namespace : namespaces) { if (name.startsWith(namespace)) { found = true; break; } } if (!found) { // Skip this index. Not a desired namespace. continue; } } out.println("name=" + name); // load index from its checkpoint record. final ICheckpointProtocol ndx; try { ndx = journal.getIndexWithCommitRecord(name, commitRecord); } catch (Throwable t) { if (InnerCause.isInnerCause(t, ClassNotFoundException.class)) { /* * This is typically a tuple serializer that has a * dependency on an application class that is not present in * the CLASSPATH. Add the necessary dependency(s) and you * should no longer see this message. */ log.warn("Could not load index: " + InnerCause.getInnerCause(t, ClassNotFoundException.class)); continue; } else throw new RuntimeException(t); } // show checkpoint record. out.println("\t" + ndx.getCheckpoint()); // show metadata record. out.println("\t" + ndx.getIndexMetadata()); /* * Collect statistics on the page usage for the index. * * TODO If we kept the BTree counters for the #of bytes written per * node and per leaf up to date when nodes and leaves were recycled * then we could generate (parts of) this table very quickly. As it * stands, we have to actually scan the pages in the index. */ { final BaseIndexStats stats = ndx.dumpPages( dumpPages/* recursive */, dumpPages/* visitLeaves */); out.println("\t" + stats); pageStats.put(name, stats); if (dumpIndices) { if (ndx instanceof AbstractBTree) { /* * TODO GIST : dumpTuples for HTree. */ DumpIndex.dumpIndex((AbstractBTree) ndx, showTuples); } } } } // while(itr) (next index) // Write out the statistics table. BaseIndexStats.writeOn(out, pageStats); // { // // /* // * Write out the header. // */ // boolean first = true; // // for (Map.Entry<String, BaseIndexStats> e : pageStats.entrySet()) { // // final String name = e.getKey(); // // final BaseIndexStats stats = e.getValue(); // // if (stats == null) { // // /* // * Something for which we did not extract the PageStats. // */ // // final ICheckpointProtocol tmp = journal // .getIndexWithCommitRecord(name, commitRecord); // // out.println("name: " + name + ", class=" // + tmp.getClass() + ", checkpoint=" // + tmp.getCheckpoint()); // // continue; // // } // // if (first) { // // out.println(stats.getHeaderRow()); // // first = false; // // } // // /* // * Write out the stats for this index. // */ // // out.println(stats.getDataRow()); // // } // // } } // dumpNamedIndicesMetadata /** * Utility method dumps the data associated with an address on the backing * store. A variety of methods are attempted. * * @param addr * The address. * * @return */ private String dumpRawRecord(final long addr) { if (journal.getBufferStrategy() instanceof IRWStrategy) { /** * TODO When we address this issue, do this test for all stores. * * @see <a * href="https://sourceforge.net/apps/trac/bigdata/ticket/555"> * Support PSOutputStream/InputStream at IRawStore </a> */ final IStore store = ((IRWStrategy) journal.getBufferStrategy()) .getStore(); try { final InputStream is = store.getInputStream(addr); try { // TODO Could dump the stream. } finally { try { is.close(); } catch (IOException e) { // Ignore. } } return "Address is stream: addr=" + addr; } catch (RuntimeException ex) { // ignore. } } final ByteBuffer buf; try { buf = journal.read(addr); } catch (Throwable t) { final String msg = "Could not read: addr=" + addr + ", ex=" + t; log.error(msg, t); return msg; } if (buf == null) throw new IllegalArgumentException("Nothing at that address"); final Object obj = decodeData(buf); if (obj == null) { return "Could not decode: addr=" + addr; } else { return obj.toString(); } } /** * Attempt to decode data read from some address using a variety of * mechanisms. * * @param b * The data. * * @return The decoded object -or- <code>null</code> if the object could not * be decoded. */ private Object decodeData(final ByteBuffer buf) { if(buf == null) throw new IllegalArgumentException(); /* * Note: Always use buf.duplicate() to avoid a side-effect on the * ByteBuffer that we are trying to decode! */ try { /** * Note: This handles a lot of cases, including: * * Checkpoint, IndexMetadata */ return SerializerUtil.deserialize(buf.duplicate()); } catch (RuntimeException ex) { // fall through } /* * TODO Root blocks and what else? */ /* * Try to decode an index node/leaf. */ { final long commitTime = journal.getLastCommitTime(); final Iterator<String> nitr = journal.indexNameScan( null/* prefix */, commitTime); while (nitr.hasNext()) { // a registered index. final String name = nitr.next(); final ICheckpointProtocol ndx = journal.getIndexLocal(name, commitTime); final IndexTypeEnum indexType = ndx.getCheckpoint() .getIndexType(); switch (indexType) { case BTree: { final AbstractBTree btree = (AbstractBTree) ndx; final com.bigdata.btree.NodeSerializer nodeSer = btree .getNodeSerializer(); try { final com.bigdata.btree.data.IAbstractNodeData nodeOrLeaf = nodeSer .decode(buf.duplicate()); log.warn("Record decoded from index=" + name); return nodeOrLeaf; } catch (Throwable t) { // ignore. continue; } } case HTree: { final AbstractHTree htree = (AbstractHTree)ndx; final com.bigdata.htree.NodeSerializer nodeSer = htree.getNodeSerializer(); try { final com.bigdata.btree.data.IAbstractNodeData nodeOrLeaf = nodeSer .decode(buf.duplicate()); log.warn("Record decoded from index=" + name); return nodeOrLeaf; } catch (Throwable t) { // Ignore. continue; } } case Stream: @SuppressWarnings("unused") final Stream stream = (Stream) ndx; /* * Note: We can't do anything here with a Stream, but we do * try to read on the address as a stream in the caller. */ continue; default: throw new UnsupportedOperationException( "Unknown indexType=" + indexType); } } } // Could not decode. return null; } }