NodeIterator.java example

Explorer
eXist-1.4.x-master
package org.exist.storage.dom;

import org.apache.log4j.Logger;
import org.exist.dom.DocumentImpl;
import org.exist.dom.NodeProxy;
import org.exist.dom.StoredNode;
import org.exist.storage.DBBroker;
import org.exist.storage.StorageAddress;
import org.exist.storage.btree.BTree;
import org.exist.storage.btree.BTreeException;
import org.exist.storage.btree.Paged.Page;
import org.exist.storage.lock.Lock;
import org.exist.util.ByteConversion;
import org.exist.util.LockException;
import org.exist.util.sanity.SanityCheck;

import java.io.IOException;
import java.util.Iterator;

/**
 * Class NodeIterator is used to iterate over nodes in the DOM storage.
 * This implementation locks the DOM file to read the node and unlocks
 * it afterwards. It is thus safer than DOMFileIterator, since the node's
 * value will not change. 
 * 
 * @author wolf
 */
public final class NodeIterator implements Iterator {

	private final static Logger LOG = Logger.getLogger(NodeIterator.class);

	private DOMFile db = null;
	private StoredNode node = null;
	private DocumentImpl doc = null;
	private int offset;
	private short lastTID = ItemId.UNKNOWN_ID;
	private DOMFile.DOMPage p = null;
	private long page;
	private long startAddress = StoredNode.UNKNOWN_NODE_IMPL_ADDRESS;
	private DBBroker broker;
	private boolean useNodePool = false;

	public NodeIterator(DBBroker broker, DOMFile db, StoredNode node, boolean poolable)
			throws BTreeException, IOException {
		this.db = db;
		this.doc = (DocumentImpl)node.getOwnerDocument();
		this.useNodePool = poolable;
		this.node = node;
		this.broker = broker;
	}
	
	/**
	 *  Returns the internal virtual address of the node at the iterator's
	 * current position.
	 *
	 *@return    The currentAddress value
	 */
	public long currentAddress() {
		return StorageAddress.createPointer((int) page, ItemId.getId(lastTID));
	}

	/**
	 *  Are there more nodes to be read?
	 *
	 *@return    Description of the Return Value
	 */
	public boolean hasNext() {
		Lock lock = db.getLock();
		try {
			try {
				lock.acquire(Lock.READ_LOCK);
			} catch (LockException e) {
				LOG.warn("Failed to acquire read lock on " + db.getFile().getName());
				return false;
			}
			db.setOwnerObject(broker);
			if(gotoNextPosition()) {
				db.getPageBuffer().add(p);
				final DOMFile.DOMFilePageHeader ph = p.getPageHeader();
				if (offset < ph.getDataLength())
					return true;
				else if (ph.getNextDataPage() == Page.NO_PAGE)
					return false;
				else
					return true;
			}
		} catch (BTreeException e) {
			LOG.warn(e);
		} catch (IOException e) {
			LOG.warn(e);
		} finally {
			lock.release(Lock.READ_LOCK);
		}
		return false;
	}

	/**
	 *  Returns the next node in document order. 
	 */
	public Object next() {
		Lock lock = db.getLock();
		try {
			try {
				lock.acquire(Lock.READ_LOCK);
			} catch (LockException e) {
				LOG.warn("Failed to acquire read lock on " + db.getFile().getName());
				return null;
			}
			db.setOwnerObject(broker);
			StoredNode nextNode = null;
			if(gotoNextPosition()) {
				long backLink = 0;
			    do {
					DOMFile.DOMFilePageHeader ph = p.getPageHeader();
					// next value larger than length of the current page?
					if (offset >= ph.getDataLength()) {
						// load next page in chain
						long nextPage = ph.getNextDataPage();
						if (nextPage == Page.NO_PAGE) {
                            SanityCheck.TRACE("bad link to next " + p.page.getPageInfo() + "; previous: " +
									ph.getPrevDataPage() + "; offset = " + offset + "; lastTID = " + lastTID);
    					    System.out.println(db.debugPageContents(p));
							return null;
						}
						page = nextPage;
						p = db.getCurrentPage(nextPage);
						//LOG.debug(" -> " + nextPage + "; len = " + p.len + "; " + p.page.getPageInfo());
						db.addToBuffer(p);
						offset = 0;
					}
					// extract the tid
					lastTID = ByteConversion.byteToShort(p.data, offset);					
					offset += DOMFile.LENGTH_TID;
					
					//	check if this is just a link to a relocated node
					if(ItemId.isLink(lastTID)) {
						// skip this
						offset += DOMFile.LENGTH_FORWARD_LOCATION;
						//System.out.println("skipping link on p " + page + " -> " + 
						//StorageAddress.pageFromPointer(link));
						//continue the iteration
						continue;
					}
					
					// read data length
					short vlen = ByteConversion.byteToShort(p.data, offset);
					offset += DOMFile.LENGTH_DATA_LENGTH;
					if (vlen < 0) {
						LOG.warn("Got negative length" + vlen + " at offset " + offset + "!!!");
						LOG.debug(db.debugPageContents(p));
						//TODO : throw an exception right now ?
					}                    
					
					if(ItemId.isRelocated(lastTID)) {
						// found a relocated node. Read the original address
						backLink = ByteConversion.byteToLong(p.data, offset);
						offset += DOMFile.LENGTH_ORIGINAL_LOCATION;
					}
					
					//	overflow page? load the overflow value
					if(vlen == DOMFile.OVERFLOW) {
						vlen = DOMFile.LENGTH_OVERFLOW_LOCATION;
						final long overflow = ByteConversion.byteToLong(p.data, offset);
						offset += DOMFile.LENGTH_OVERFLOW_LOCATION;
						try {
							final byte[] odata = db.getOverflowValue(overflow);
							nextNode = StoredNode.deserialize(odata, 0, odata.length, doc, useNodePool);
						} catch(Exception e) {
							LOG.warn("Exception while loading overflow value: " + e.getMessage() +
									"; originating page: " + p.page.getPageInfo());
						}
					// normal node
					} else {
                        try {
    						nextNode = StoredNode.deserialize(p.data, offset, vlen, doc, useNodePool);
    						offset += vlen;
                        } catch(Exception e) {
                            LOG.warn("Error while deserializing node: " + e.getMessage(), e);
                            LOG.warn("Reading from offset: " + offset + "; len = " + vlen);
                            LOG.debug(db.debugPageContents(p));
    					    System.out.println(db.debugPageContents(p));
                            throw new RuntimeException(e);
                        }
					}
					if(nextNode == null) {
					    LOG.warn("illegal node on page " + p.getPageNum() + "; tid = " + ItemId.getId(lastTID) +
					            "; next = " + p.getPageHeader().getNextDataPage() + "; prev = " + 
					            p.getPageHeader().getPrevDataPage() + "; offset = " + (offset - vlen) +
					            "; len = " + p.getPageHeader().getDataLength());
					    System.out.println(db.debugPageContents(p));					    
					    return null;
					}
					if(ItemId.isRelocated(lastTID)) {
						nextNode.setInternalAddress(backLink);
					} else {
						nextNode.setInternalAddress(
							StorageAddress.createPointer((int) page, ItemId.getId(lastTID))
						);
					}
					nextNode.setOwnerDocument(doc);
				//YES ! needed because of the continue statement above
			    } while (nextNode == null);
			}
			return nextNode;
		} catch (BTreeException e) {
			LOG.warn(e.getMessage(), e);
		} catch (IOException e) {
			LOG.warn(e.getMessage(), e);
		} finally {
			lock.release(Lock.READ_LOCK);
		}
		return null;
	}

	private boolean gotoNextPosition() throws BTreeException, IOException {
		//	position the iterator at the start of the first value
		if (node != null) {
            RecordPos rec = null;
            if (node.getInternalAddress() != StoredNode.UNKNOWN_NODE_IMPL_ADDRESS)
                rec = db.findRecord(node.getInternalAddress());
            if (rec == null) {
    			long addr = db.findValue(broker, new NodeProxy(node));
    			if (addr == BTree.KEY_NOT_FOUND)
    				return false;
    			rec = db.findRecord(addr);
            }
			page = rec.getPage().getPageNum();
			p = rec.getPage();
			//Position the stream at the very beginning of the record
			offset = rec.offset - DOMFile.LENGTH_TID;
			node = null;
			return true;
		} else if (startAddress != StoredNode.UNKNOWN_NODE_IMPL_ADDRESS) {
			final RecordPos rec = db.findRecord(startAddress);
			if(rec == null)
				throw new IOException("Node not found at specified address.");
			page = rec.getPage().getPageNum();
			//Position the stream at the very beginning of the record
			offset = rec.offset - DOMFile.LENGTH_TID;
			p = rec.getPage();
			startAddress = StoredNode.UNKNOWN_NODE_IMPL_ADDRESS;
			return true;
		} else if (page != Page.NO_PAGE) {
			p = db.getCurrentPage(page);
			db.addToBuffer(p);
//			LOG.debug("reading " + p.page.getPageNum() + "; " + p.page.hashCode());
			return true;
		}
		return false;
	}
	
	/**
	 * Remove the current node. This implementation just
	 * decrements the node count. It does not actually remove
	 * the node's value, but removes a page if
	 * node count == 0. Use this method only if you want to
	 * delete an entire document, not to remove a single node.
	 */
	public void remove() {
		throw new RuntimeException("remove() method not implemented");
	}

	/**
	 *  Reposition the iterator at the address of the proxy node.
	 *
	 *@param  node  The new to value
	 */
	public void setTo(StoredNode node) {
		if (node.getInternalAddress() != StoredNode.UNKNOWN_NODE_IMPL_ADDRESS) {
			startAddress = node.getInternalAddress();
		} else {
			this.node = node;
		}
	}

	/**
	 *  Reposition the iterate at a given address.
	 *
	 *@param  address  The new to value
	 */
	public void setTo(long address) {
		this.startAddress = address;
	}
}