package org.cdlib.xtf.util;
/**
* Copyright (c) 2004, Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the University of California nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
import java.io.IOException;
/**
* Provides quick access to a disk-based hash table created by
* a {@link DiskHashWriter}.
*
* @author Martin Haye
*/
public class DiskHashReader
{
/** Size of the header we expect to find */
static final int headerSize = 12;
/** SubStore to read the hash from */
private SubStoreReader subfile;
/** Number of hash slots in the subfile */
private int nSlots;
/** Size of each hash slot */
private int slotSize;
/** Buffer used to read hash slot bytes */
private byte[] slotBytes;
/** Used to decode hash slot values */
private PackedByteBuf slotBuf;
/**
* Read in the header of of the hash from the given subfile.
*
* @param subfile Must have been created by DiskHashWriter.outputTo()
*/
public DiskHashReader(SubStoreReader subfile)
throws IOException
{
this.subfile = subfile;
// Read the header.
byte[] magic = new byte[4];
subfile.read(magic);
if (magic[0] != 'h' ||
magic[1] != 'a' ||
magic[2] != 's' ||
magic[3] != 'h')
throw new IOException("SubStore isn't a proper DiskHash");
nSlots = subfile.readInt();
slotSize = subfile.readInt();
// Allocate the slot buffer.
slotBytes = new byte[slotSize];
slotBuf = new PackedByteBuf(slotBytes);
} // constructor
/**
* Closes the reader (and its associated subfile).
*/
public void close()
{
try {
subfile.close();
}
catch (Exception e) {
}
subfile = null;
} // close()
/**
* Locate the entry for the given string key. If not found, returns null.
* @param key key to look for
*/
public PackedByteBuf find(String key)
throws IOException
{
// Don't allow empty string as a key, since it's used to mark
// the end of a slot.
//
if (key.length() == 0)
key = " ";
// Find the location of the slot data. If zero, we can fail now.
int slotNum = (key.hashCode() & 0xffffff) % nSlots;
subfile.seek(headerSize + (slotNum * 4));
int slotOffset = subfile.readInt();
if (slotOffset == 0)
return null;
assert (slotOffset + slotSize) <= subfile.length() : "Corrupt hash offset";
// Read the slot data (may be too much, but will always be enough).
subfile.seek(slotOffset);
subfile.read(slotBytes);
slotBuf.setBytes(slotBytes);
// Now scan the entries
while (true)
{
// Get the name. If empty, give up.
String name = slotBuf.readString();
if (name.length() == 0)
return null;
// Does it match? If not, advance to the next slot.
if (!name.equals(key)) {
slotBuf.skipBuffer();
continue;
}
// Got a match!
return slotBuf.readBuffer();
} // while
} // find()
} // class DiskHashReader