/* * Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* * Licensed Materials - Property of IBM * RMI-IIOP v1.0 * Copyright IBM Corp. 1998 1999 All Rights Reserved * */ package sun.rmi.rmic.iiop; /** * StaticStringsHash takes an array of constant strings and * uses several different hash methods to try to find the * 'best' one for that set. The set of methods is currently * fixed, but with a little work could be made extensible thru * subclassing. * <p> * The current set of methods is: * <ol> * <li> length() - works well when all strings are different length.</li> * <li> charAt(n) - works well when one offset into all strings is different.</li> * <li> hashCode() - works well with larger arrays.</li> * </ol> * After constructing an instance over the set of strings, the * <code>getKey(String)</code> method can be used to use the selected hash * method to produce a key. The <code>method</code> string will contain * "length()", "charAt(n)", or "hashCode()", and is intended for use by * code generators. * <p> * The <code>keys</code> array will contain the full set of unique keys. * <p> * The <code>buckets</code> array will contain a set of arrays, one for * each key in the <code>keys</code>, where <code>buckets[x][y]</code> * is an index into the <code>strings</code> array. * @author Bryan Atsatt */ public class StaticStringsHash { /** The set of strings upon which the hash info is created */ public String[] strings = null; /** Unique hash keys */ public int[] keys = null; /** Buckets for each key, where buckets[x][y] is an index * into the strings[] array. */ public int[][] buckets = null; /** The method to invoke on String to produce the hash key */ public String method = null; /** Get a key for the given string using the * selected hash method. * @param str the string to return a key for. * @return the key. */ public int getKey(String str) { switch (keyKind) { case LENGTH: return str.length(); case CHAR_AT: return str.charAt(charAt); case HASH_CODE: return str.hashCode(); } throw new Error("Bad keyKind"); } /** Constructor * @param strings the set of strings upon which to * find an optimal hash method. Must not contain * duplicates. */ public StaticStringsHash(String[] strings) { this.strings = strings; length = strings.length; tempKeys = new int[length]; bucketSizes = new int[length]; setMinStringLength(); // Decide on the best algorithm based on // which one has the smallest maximum // bucket depth. First, try length()... int currentMaxDepth = getKeys(LENGTH); int useCharAt = -1; boolean useHashCode = false; if (currentMaxDepth > 1) { // At least one bucket had more than one // entry, so try charAt(i). If there // are a lot of strings in the array, // and minStringLength is large, limit // the search to a smaller number of // characters to avoid spending a lot // of time here that is most likely to // be pointless... int minLength = minStringLength; if (length > CHAR_AT_MAX_LINES && length * minLength > CHAR_AT_MAX_CHARS) { minLength = length/CHAR_AT_MAX_CHARS; } charAt = 0; for (int i = 0; i < minLength; i++) { int charAtDepth = getKeys(CHAR_AT); if (charAtDepth < currentMaxDepth) { currentMaxDepth = charAtDepth; useCharAt = i; if (currentMaxDepth == 1) { break; } } charAt++; } charAt = useCharAt; if (currentMaxDepth > 1) { // At least one bucket had more than one // entry, try hashCode(). // // Since the cost of computing a full hashCode // (for the runtime target string) is much higher // than the previous methods, use it only if it is // substantially better. The definition of 'substantial' // here is not very well founded, and could be improved // with some further analysis ;^) int hashCodeDepth = getKeys(HASH_CODE); if (hashCodeDepth < currentMaxDepth-3) { // Using the full hashCode results in at least // 3 fewer entries in the worst bucket, so will // therefore avoid at least 3 calls to equals() // in the worst case. // // Note that using a number smaller than 3 could // result in using a hashCode when there are only // 2 strings in the array, and that would surely // be a poor performance choice. useHashCode = true; } } // Reset keys if needed... if (!useHashCode) { if (useCharAt >= 0) { // Use the charAt(i) method... getKeys(CHAR_AT); } else { // Use length method... getKeys(LENGTH); } } } // Now allocate and fill our real hashKeys array... keys = new int[bucketCount]; System.arraycopy(tempKeys,0,keys,0,bucketCount); // Sort keys and bucketSizes arrays... boolean didSwap; do { didSwap = false; for (int i = 0; i < bucketCount - 1; i++) { if (keys[i] > keys[i+1]) { int temp = keys[i]; keys[i] = keys[i+1]; keys[i+1] = temp; temp = bucketSizes[i]; bucketSizes[i] = bucketSizes[i+1]; bucketSizes[i+1] = temp; didSwap = true; } } } while (didSwap == true); // Allocate our buckets array. Fill the string // index slot with an unused key so we can // determine which are free... int unused = findUnusedKey(); buckets = new int[bucketCount][]; for (int i = 0; i < bucketCount; i++) { buckets[i] = new int[bucketSizes[i]]; for (int j = 0; j < bucketSizes[i]; j++) { buckets[i][j] = unused; } } // And fill it in... for(int i = 0; i < strings.length; i++) { int key = getKey(strings[i]); for (int j = 0; j < bucketCount; j++) { if (keys[j] == key) { int k = 0; while (buckets[j][k] != unused) { k++; } buckets[j][k] = i; break; } } } } /** Print an optimized 'contains' method for the * argument strings */ public static void main (String[] args) { StaticStringsHash hash = new StaticStringsHash(args); System.out.println(); System.out.println(" public boolean contains(String key) {"); System.out.println(" switch (key."+hash.method+") {"); for (int i = 0; i < hash.buckets.length; i++) { System.out.println(" case "+hash.keys[i]+": "); for (int j = 0; j < hash.buckets[i].length; j++) { if (j > 0) { System.out.print(" } else "); } else { System.out.print(" "); } System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {"); System.out.println(" return true;"); } System.out.println(" }"); } System.out.println(" }"); System.out.println(" return false;"); System.out.println(" }"); } private int length; private int[] tempKeys; private int[] bucketSizes; private int bucketCount; private int maxDepth; private int minStringLength = Integer.MAX_VALUE; private int keyKind; private int charAt; private static final int LENGTH = 0; private static final int CHAR_AT = 1; private static final int HASH_CODE = 2; /* Determines the maximum number of charAt(i) * tests that will be done. The search is * limited because if the number of characters * is large enough, the likelyhood of finding * a good hash key based on this method is * low. The CHAR_AT_MAX_CHARS limit only * applies f there are more strings than * CHAR_AT_MAX_LINES. */ private static final int CHAR_AT_MAX_LINES = 50; private static final int CHAR_AT_MAX_CHARS = 1000; private void resetKeys(int keyKind) { this.keyKind = keyKind; switch (keyKind) { case LENGTH: method = "length()"; break; case CHAR_AT: method = "charAt("+charAt+")"; break; case HASH_CODE: method = "hashCode()"; break; } maxDepth = 1; bucketCount = 0; for (int i = 0; i < length; i++) { tempKeys[i] = 0; bucketSizes[i] = 0; } } private void setMinStringLength() { for (int i = 0; i < length; i++) { if (strings[i].length() < minStringLength) { minStringLength = strings[i].length(); } } } private int findUnusedKey() { int unused = 0; int keysLength = keys.length; // Note that we just assume that resource // exhaustion will occur rather than an // infinite loop here if the set of keys // is very large. while (true) { boolean match = false; for (int i = 0; i < keysLength; i++) { if (keys[i] == unused) { match = true; break; } } if (match) { unused--; } else { break; } } return unused; } private int getKeys(int methodKind) { resetKeys(methodKind); for(int i = 0; i < strings.length; i++) { addKey(getKey(strings[i])); } return maxDepth; } private void addKey(int key) { // Have we seen this one before? boolean addIt = true; for (int j = 0; j < bucketCount; j++) { if (tempKeys[j] == key) { addIt = false; bucketSizes[j]++; if (bucketSizes[j] > maxDepth) { maxDepth = bucketSizes[j]; } break; } } if (addIt) { tempKeys[bucketCount] = key; bucketSizes[bucketCount] = 1; bucketCount++; } } }