/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.util;
// Java 2 standard packages
import java.io.*;
import java.util.Map;
import java.util.LinkedHashMap;
// Third party packages
import org.apache.log4j.Logger;
import org.mulgara.util.TempDir;
/**
* A temporary disk based hash map that maps strings of up to 255 characters
* to unsigned longs with values of up to 2^56. String to long map entries
* can be added but not removed. This class does not implement the Map
* interface. Although this map is disk based it can not be used for
* persistent storage.
*
* @created 2004-05-07
*
* @author David Makepeace
*
* @version $Revision: 1.9 $
*
* @modified $Date: 2005/01/05 04:59:29 $
*
* @maintenanceAuthor $Author: newmana $
*
* @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
*
* @copyright ©2004 <a href="http://www.pisoftware.com/">Plugged In
* Software Pty Ltd</a>
*
* @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
*/
public final class StringToLongMap {
private static final Logger logger = Logger.getLogger(StringToLongMap.class);
// Hash bucket format (16 bytes):
// [ hash code (4) ][ string offset (4) ]
// [ value (8) ]
//
// A Hash bucket is free if the value is zero.
private static final int MASK31 = 0x7fffffff;
private static final long MASK32 = 0xffffffffL;
/**
* This is a hint for the initial number of buckets to use. This will be
* rounded up to a prime number obtained from the {@link #primes} array.
*/
private static final int MIN_NR_BUCKETS = 2000;
/**
* This is used to determine when a rehash should be performed.
* @see #rehash
*/
private static final float REHASH_LIMIT = 0.8f;
/**
* This is a list of primes where each prime is roughly double the previous
* one. It is used to select a hash table size which is a prime number.
*/
private static final int[] primes = {
503, 1009, 2027, 4057, 8117, 16249, 32503, 65011, 130027, 260081,
520193, 1040387, 2080777, 4161557, 8323151, 16646317, 33292687,
66585377, 133170769, 266341583, 532683227, 1065366479, 2130732959
};
private Cache cache = new Cache(100);
private File hashFileName;
private IntFile hashFile;
private File stringFileName;
private RandomAccessFile stringFile;
private int nrUsedBuckets = 0;
private int nrBuckets;
public StringToLongMap() throws IOException {
hashFileName = TempDir.createTempFile("strToLong", ".hash");
hashFile = IntFile.open(hashFileName);
stringFileName = TempDir.createTempFile("strToLong", ".str");
stringFile = new RandomAccessFile(stringFileName, "rw");
nrBuckets = findPrime(MIN_NR_BUCKETS);
clear();
}
public void clear() throws IOException {
hashFile.setSize(0);
stringFile.setLength(0);
cache.clear();
}
/**
* Associates a long with a String.
*
* @param str the String.
* @throws IOException if an I/O error occurs.
*/
public void put(String str, long value) throws IOException {
getAndPut(str, value);
}
/**
* Returns the long associated with the String or 0 if there is no long
* associated with the string.
*
* @param str the String.
* @return the long associated with the String or 0 if there is no long
* associated with the string.
* @throws IOException if an I/O error occurs.
*/
public long get(String str) throws IOException {
return getAndPut(str, 0);
}
/**
* Returns the long associated with the String or 0 if there is no long
* associated with the string. If newValue is not equal to 0 then this
* becomes the new value associated with the string.
*
* @param str the String.
* @param newValue if not equal to 0, the new value to be associated with
* the string.
* @return the long currently associated with the String or 0 if there is no
* long associated with the string.
* @throws IOException if an I/O error occurs or the hash table is full.
*/
public long getAndPut(String str, long newValue) throws IOException {
if (str == null) {
throw new IllegalArgumentException("str is null");
}
if (newValue == 0) {
Long l = cache.get(str);
if (l != null) {
return l.longValue();
}
}
int hashCode = str.hashCode();
int startBucket = calcBucket(hashCode);
int bucket = startBucket;
long value;
// Try buckets until we find the correct string or an empty bucket.
while ((value = getValue(bucket)) != 0) {
// Check the hash code.
long offset = (long)bucket * 4;
if (hashCode == hashFile.getInt(offset)) {
// Fetch the string and compare with the target string.
String bucketStr = readString(hashFile.getUInt(offset + 1));
if (str.equals(bucketStr)) {
if (newValue != 0) {
hashFile.putLong((long)bucket * 2 + 1, newValue);
// Add the new value to the cache.
cache.put(str, newValue);
} else {
// Add the value to the cache.
cache.put(str, value);
}
return value;
}
}
// Try the next bucket.
bucket = (bucket + 1) % nrBuckets;
if (bucket == startBucket) {
throw new IOException("Hash table full");
}
}
if (newValue != 0) {
// Add the new value to the cache.
cache.put(str, newValue);
// Add a new hash bucket.
long bucketOffset = (long)bucket * 4;
hashFile.putInt(bucketOffset, hashCode);
hashFile.putUInt(bucketOffset + 1, writeString(str));
hashFile.putLong((long)bucket * 2 + 1, newValue);
++nrUsedBuckets;
if (nrUsedBuckets >= (int)(nrBuckets * REHASH_LIMIT)) {
rehash(findPrime(nrBuckets));
}
}
return 0;
}
/**
* Closes and deletes the files.
*/
public void delete() {
if (hashFile != null) {
try {
hashFile.delete();
} catch (IOException ex) {
logger.warn(
"An I/O error occurred while deleting: " + hashFileName, ex
);
} finally {
hashFile = null;
}
}
if (stringFile != null) {
try {
stringFile.close();
stringFileName.delete();
} catch (IOException ex) {
logger.warn(
"An I/O error occurred while deleting: " + stringFileName, ex
);
} finally {
stringFile = null;
}
}
}
/**
* Ensure that resources are cleaned up correctly, if not already done.
* @see java.lang.Object#finalize()
*/
protected void finalize() throws Throwable {
try {
delete();
} finally {
super.finalize();
}
}
/**
* Reorgainizes all the key/value pairs in the hash table to accomodate the
* new size.
*
* @param newNrBuckets The new size.
*/
private void rehash(int newNrBuckets) throws IOException {
if (newNrBuckets == nrBuckets) return;
int oldNrBuckets = nrBuckets;
nrBuckets = newNrBuckets;
// Iterate over the buckets, moving items to their correct locations.
for (int bucket = 0; ; ++bucket) {
long value;
if ((value = getValue(bucket)) != 0) {
// This bucket is in use.
long offset = (long)bucket * 4;
int hashCode = hashFile.getInt(offset);
int destBucket = calcBucket(hashCode);
// Work out where the item belongs. Abort if it ends up back here.
while (destBucket != bucket) {
if (getValue(destBucket) == 0) {
// Found an empty bucket. Move the item here.
long destOffset = (long)destBucket * 4;
hashFile.putInt(destOffset, hashCode);
hashFile.putInt(destOffset + 1, hashFile.getInt(offset + 1));
hashFile.putLong((long)destBucket * 2 + 1, value);
// Clear the old bucket.
hashFile.putLong((long)bucket * 2 + 1, 0);
break;
}
// Try the next bucket.
destBucket = (destBucket + 1) % nrBuckets;
}
} else {
// An empty bucket.
// Stop if we have processed all of the old buckets.
// NOTE: We potentially process more than oldNrBuckets buckets
// so that we pick up any items that clashed and were bounced
// past the end of this range 0..(oldNrBuckets-1).
if (bucket >= oldNrBuckets) break;
}
}
}
/**
* Finds a prime number larger than the given number.
*
* @param n the number.
* @return a prime number larger than n or n if there are no primes larger
* than n in the primes array.
*/
private int findPrime(int n) {
for (int i = 0; i < primes.length; ++i)
if (primes[i] > n) return primes[i];
return n;
}
private int calcBucket(int hashCode) {
return (hashCode & MASK31) % nrBuckets;
}
private long getValue(int bucket) throws IOException {
return hashFile.getLong((long)bucket * 2 + 1);
}
private String readString(long strOffset) throws IOException {
stringFile.seek(strOffset);
return stringFile.readUTF();
}
/**
* Appends a string to the file and returns the offset of the start of the
* string.
*
* @param str the string to be written.
* @return the offset of the start of the string that was written.
* @throws IOException if an I/O error occurs or the offset would be larger
* than 2^32-1.
*/
private long writeString(String str) throws IOException {
// Write the string at the end of the file.
long pos = stringFile.length();
if (pos > MASK32) throw new IOException("String file too large");
stringFile.seek(pos);
stringFile.writeUTF(str);
return pos;
}
static final class Cache extends LinkedHashMap<String,Long> {
/** serialization ID */
private static final long serialVersionUID = -1793863465408688001L;
public static final float LOAD_FACTOR = 0.75F;
final int MAX_SIZE;
public Cache(int maxSize) {
super((int)Math.ceil(maxSize / LOAD_FACTOR + 1), LOAD_FACTOR, true);
MAX_SIZE = maxSize;
}
protected boolean removeEldestEntry(Map.Entry<String,Long> eldest) {
return size() > MAX_SIZE;
}
public void put(String s, long l) {
assert s != null;
super.put(s, new Long(l));
}
public Long get(String s) {
assert s != null;
return super.get(s);
}
}
}