StringToLongMap.java example

Explorer
mulgara-master
- src
  - jar
  - war
    - server-http
      - java
        HttpServer.java
        HttpServerServlet.java
- tools
  - src
    - org
      - mulgara
        tools
        Sparql.java
        Tql.java
/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is the Kowari Metadata Store.
 *
 * The Initial Developer of the Original Code is Plugged In Software Pty
 * Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
 * created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
 * Plugged In Software Pty Ltd. All Rights Reserved.
 *
 * Contributor(s): N/A.
 *
 * [NOTE: The text of this Exhibit A may differ slightly from the text
 * of the notices in the Source Code files of the Original Code. You
 * should use the text of this Exhibit A rather than the text found in the
 * Original Code Source Code for Your Modifications.]
 *
 */

package org.mulgara.util;

// Java 2 standard packages
import java.io.*;
import java.util.Map;
import java.util.LinkedHashMap;

// Third party packages
import org.apache.log4j.Logger;

import org.mulgara.util.TempDir;


/**
 * A temporary disk based hash map that maps strings of up to 255 characters
 * to unsigned longs with values of up to 2^56.  String to long map entries
 * can be added but not removed.  This class does not implement the Map
 * interface.  Although this map is disk based it can not be used for
 * persistent storage.
 *
 * @created 2004-05-07
 *
 * @author David Makepeace
 *
 * @version $Revision: 1.9 $
 *
 * @modified $Date: 2005/01/05 04:59:29 $
 *
 * @maintenanceAuthor $Author: newmana $
 *
 * @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
 *
 * @copyright ©2004 <a href="http://www.pisoftware.com/">Plugged In
 *      Software Pty Ltd</a>
 *
 * @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
 */
public final class StringToLongMap {

  private static final Logger logger = Logger.getLogger(StringToLongMap.class);

  // Hash bucket format (16 bytes):
  // [ hash code (4)      ][ string offset (4)  ]
  // [ value (8)                                ]
  //
  // A Hash bucket is free if the value is zero.

  private static final int MASK31 = 0x7fffffff;
  private static final long MASK32 = 0xffffffffL;

  /**
   * This is a hint for the initial number of buckets to use.  This will be
   * rounded up to a prime number obtained from the {@link #primes} array.
   */
  private static final int MIN_NR_BUCKETS = 2000;

  /**
   * This is used to determine when a rehash should be performed.
   * @see #rehash
   */
  private static final float REHASH_LIMIT = 0.8f;

  /**
   * This is a list of primes where each prime is roughly double the previous
   * one.  It is used to select a hash table size which is a prime number.
   */
  private static final int[] primes = {
    503, 1009, 2027, 4057, 8117, 16249, 32503, 65011, 130027, 260081,
    520193, 1040387, 2080777, 4161557, 8323151, 16646317, 33292687,
    66585377, 133170769, 266341583, 532683227, 1065366479, 2130732959
  };

  private Cache cache = new Cache(100);

  private File hashFileName;

  private IntFile hashFile;

  private File stringFileName;

  private RandomAccessFile stringFile;

  private int nrUsedBuckets = 0;

  private int nrBuckets;

  public StringToLongMap() throws IOException {
    hashFileName = TempDir.createTempFile("strToLong", ".hash");
    hashFile = IntFile.open(hashFileName);

    stringFileName = TempDir.createTempFile("strToLong", ".str");
    stringFile = new RandomAccessFile(stringFileName, "rw");

    nrBuckets = findPrime(MIN_NR_BUCKETS);
    clear();
  }

  public void clear() throws IOException {
    hashFile.setSize(0);
    stringFile.setLength(0);
    cache.clear();
  }

  /**
   * Associates a long with a String.
   *
   * @param str the String.
   * @throws IOException if an I/O error occurs.
   */
  public void put(String str, long value) throws IOException {
    getAndPut(str, value);
  }

  /**
   * Returns the long associated with the String or 0 if there is no long
   * associated with the string.
   *
   * @param str the String.
   * @return the long associated with the String or 0 if there is no long
   * associated with the string.
   * @throws IOException if an I/O error occurs.
   */
  public long get(String str) throws IOException {
    return getAndPut(str, 0);
  }

  /**
   * Returns the long associated with the String or 0 if there is no long
   * associated with the string.  If newValue is not equal to 0 then this
   * becomes the new value associated with the string.
   *
   * @param str the String.
   * @param newValue if not equal to 0, the new value to be associated with
   * the string.
   * @return the long currently associated with the String or 0 if there is no
   * long associated with the string.
   * @throws IOException if an I/O error occurs or the hash table is full.
   */
  public long getAndPut(String str, long newValue) throws IOException {
    if (str == null) {
      throw new IllegalArgumentException("str is null");
    }

    if (newValue == 0) {
      Long l = cache.get(str);
      if (l != null) {
        return l.longValue();
      }
    }

    int hashCode = str.hashCode();
    int startBucket = calcBucket(hashCode);
    int bucket = startBucket;
    long value;

    // Try buckets until we find the correct string or an empty bucket.
    while ((value = getValue(bucket)) != 0) {
      // Check the hash code.
      long offset = (long)bucket * 4;
      if (hashCode == hashFile.getInt(offset)) {
        // Fetch the string and compare with the target string.
        String bucketStr = readString(hashFile.getUInt(offset + 1));
        if (str.equals(bucketStr)) {
          if (newValue != 0) {
            hashFile.putLong((long)bucket * 2 + 1, newValue);
            // Add the new value to the cache.
            cache.put(str, newValue);
          } else {
            // Add the value to the cache.
            cache.put(str, value);
          }
          return value;
        }
      }

      // Try the next bucket.
      bucket = (bucket + 1) % nrBuckets;
      if (bucket == startBucket) {
        throw new IOException("Hash table full");
      }
    }

    if (newValue != 0) {
      // Add the new value to the cache.
      cache.put(str, newValue);

      // Add a new hash bucket.
      long bucketOffset = (long)bucket * 4;
      hashFile.putInt(bucketOffset, hashCode);
      hashFile.putUInt(bucketOffset + 1, writeString(str));
      hashFile.putLong((long)bucket * 2 + 1, newValue);
      ++nrUsedBuckets;

      if (nrUsedBuckets >= (int)(nrBuckets * REHASH_LIMIT)) {
        rehash(findPrime(nrBuckets));
      }
    }

    return 0;
  }

  /**
   * Closes and deletes the files.
   */
  public void delete() {
    if (hashFile != null) {
      try {
        hashFile.delete();
      } catch (IOException ex) {
        logger.warn(
            "An I/O error occurred while deleting: " + hashFileName, ex
        );
      } finally {
        hashFile = null;
      }
    }

    if (stringFile != null) {
      try {
        stringFile.close();
        stringFileName.delete();
      } catch (IOException ex) {
        logger.warn(
            "An I/O error occurred while deleting: " + stringFileName, ex
        );
      } finally {
        stringFile = null;
      }
    }
  }

  /**
   * Ensure that resources are cleaned up correctly, if not already done.
   * @see java.lang.Object#finalize()
   */
  protected void finalize() throws Throwable {
    try {
      delete();
    } finally {
      super.finalize();
    }
  }

  /**
   * Reorgainizes all the key/value pairs in the hash table to accomodate the
   * new size.
   *
   * @param newNrBuckets The new size.
   */
  private void rehash(int newNrBuckets) throws IOException {
    if (newNrBuckets == nrBuckets) return;

    int oldNrBuckets = nrBuckets;
    nrBuckets = newNrBuckets;

    // Iterate over the buckets, moving items to their correct locations.
    for (int bucket = 0; ; ++bucket) {
      long value;
      if ((value = getValue(bucket)) != 0) {
        // This bucket is in use.
        long offset = (long)bucket * 4;
        int hashCode = hashFile.getInt(offset);
        int destBucket = calcBucket(hashCode);

        // Work out where the item belongs.  Abort if it ends up back here.
        while (destBucket != bucket) {
          if (getValue(destBucket) == 0) {
            // Found an empty bucket.  Move the item here.
            long destOffset = (long)destBucket * 4;
            hashFile.putInt(destOffset, hashCode);
            hashFile.putInt(destOffset + 1, hashFile.getInt(offset + 1));
            hashFile.putLong((long)destBucket * 2 + 1, value);

            // Clear the old bucket.
            hashFile.putLong((long)bucket * 2 + 1, 0);
            break;
          }

          // Try the next bucket.
          destBucket = (destBucket + 1) % nrBuckets;
        }
      } else {
        // An empty bucket.
        // Stop if we have processed all of the old buckets.
        // NOTE: We potentially process more than oldNrBuckets buckets
        // so that we pick up any items that clashed and were bounced
        // past the end of this range 0..(oldNrBuckets-1).
        if (bucket >= oldNrBuckets) break;
      }
    }
  }

  /**
   * Finds a prime number larger than the given number.
   *
   * @param n the number.
   * @return a prime number larger than n or n if there are no primes larger
   *         than n in the primes array.
   */
  private int findPrime(int n) {
    for (int i = 0; i < primes.length; ++i)
      if (primes[i] > n) return primes[i];
    return n;
  }

  private int calcBucket(int hashCode) {
    return (hashCode & MASK31) % nrBuckets;
  }

  private long getValue(int bucket) throws IOException {
    return hashFile.getLong((long)bucket * 2 + 1);
  }

  private String readString(long strOffset) throws IOException {
    stringFile.seek(strOffset);
    return stringFile.readUTF();
  }

  /**
   * Appends a string to the file and returns the offset of the start of the
   * string.
   *
   * @param str the string to be written.
   * @return the offset of the start of the string that was written.
   * @throws IOException if an I/O error occurs or the offset would be larger
   * than 2^32-1.
   */
  private long writeString(String str) throws IOException {
    // Write the string at the end of the file.
    long pos = stringFile.length();
    if (pos > MASK32) throw new IOException("String file too large");

    stringFile.seek(pos);
    stringFile.writeUTF(str);
    return pos;
  }

  static final class Cache extends LinkedHashMap<String,Long> {

    /** serialization ID */
    private static final long serialVersionUID = -1793863465408688001L;

    public static final float LOAD_FACTOR = 0.75F;
    final int MAX_SIZE;

    public Cache(int maxSize) {
      super((int)Math.ceil(maxSize / LOAD_FACTOR + 1), LOAD_FACTOR, true);
      MAX_SIZE = maxSize;
    }

    protected boolean removeEldestEntry(Map.Entry<String,Long> eldest) {
      return size() > MAX_SIZE;
    }

    public void put(String s, long l) {
      assert s != null;
      super.put(s, new Long(l));
    }

    public Long get(String s) {
      assert s != null;
      return super.get(s);
    }

  }

}