/* * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. */ /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sun.org.apache.xerces.internal.util; /** * This class is a symbol table implementation that guarantees that * strings used as identifiers are unique references. Multiple calls * to <code>addSymbol</code> will always return the same string * reference. * <p> * The symbol table performs the same task as <code>String.intern()</code> * with the following differences: * <ul> * <li> * A new string object does not need to be created in order to * retrieve a unique reference. Symbols can be added by using * a series of characters in a character array. * </li> * <li> * Users of the symbol table can provide their own symbol hashing * implementation. For example, a simple string hashing algorithm * may fail to produce a balanced set of hashcodes for symbols * that are <em>mostly</em> unique. Strings with similar leading * characters are especially prone to this poor hashing behavior. * </li> * </ul> * * @see SymbolHash * * @author Andy Clark * */ public class SymbolTable { // // Constants // /** Default table size. */ protected static final int TABLE_SIZE = 101; /** Maximum hash collisions per bucket for a table with load factor == 1. */ protected static final int MAX_HASH_COLLISIONS = 40; protected static final int MULTIPLIERS_SIZE = 1 << 5; protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1; // // Data // /** Buckets. */ protected Entry[] fBuckets = null; /** actual table size **/ protected int fTableSize; /** The total number of entries in the hash table. */ protected transient int fCount; /** The table is rehashed when its size exceeds this threshold. (The * value of this field is (int)(capacity * loadFactor).) */ protected int fThreshold; /** The load factor for the SymbolTable. */ protected float fLoadFactor; /** * A new hash function is selected and the table is rehashed when * the number of keys in the bucket exceeds this threshold. */ protected final int fCollisionThreshold; /** * Array of randomly selected hash function multipliers or <code>null</code> * if the default String.hashCode() function should be used. */ protected int[] fHashMultipliers; // // Constructors // /** * Constructs a new, empty SymbolTable with the specified initial * capacity and the specified load factor. * * @param initialCapacity the initial capacity of the SymbolTable. * @param loadFactor the load factor of the SymbolTable. * @throws IllegalArgumentException if the initial capacity is less * than zero, or if the load factor is nonpositive. */ public SymbolTable(int initialCapacity, float loadFactor) { if (initialCapacity < 0) { throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity); } if (loadFactor <= 0 || Float.isNaN(loadFactor)) { throw new IllegalArgumentException("Illegal Load: " + loadFactor); } if (initialCapacity == 0) { initialCapacity = 1; } fLoadFactor = loadFactor; fTableSize = initialCapacity; fBuckets = new Entry[fTableSize]; fThreshold = (int)(fTableSize * loadFactor); fCollisionThreshold = (int)(MAX_HASH_COLLISIONS * loadFactor); fCount = 0; } /** * Constructs a new, empty SymbolTable with the specified initial capacity * and default load factor, which is <tt>0.75</tt>. * * @param initialCapacity the initial capacity of the hashtable. * @throws IllegalArgumentException if the initial capacity is less * than zero. */ public SymbolTable(int initialCapacity) { this(initialCapacity, 0.75f); } /** * Constructs a new, empty SymbolTable with a default initial capacity (101) * and load factor, which is <tt>0.75</tt>. */ public SymbolTable() { this(TABLE_SIZE, 0.75f); } // // Public methods // /** * Adds the specified symbol to the symbol table and returns a * reference to the unique symbol. If the symbol already exists, * the previous symbol reference is returned instead, in order * guarantee that symbol references remain unique. * * @param symbol The new symbol. */ public String addSymbol(String symbol) { // search for identical symbol int collisionCount = 0; int bucket = hash(symbol) % fTableSize; for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { if (entry.symbol.equals(symbol)) { return entry.symbol; } ++collisionCount; } return addSymbol0(symbol, bucket, collisionCount); } // addSymbol(String):String private String addSymbol0(String symbol, int bucket, int collisionCount) { if (fCount >= fThreshold) { // Rehash the table if the threshold is exceeded rehash(); bucket = hash(symbol) % fTableSize; } else if (collisionCount >= fCollisionThreshold) { // Select a new hash function and rehash the table if // the collision threshold is exceeded. rebalance(); bucket = hash(symbol) % fTableSize; } // create new entry Entry entry = new Entry(symbol, fBuckets[bucket]); fBuckets[bucket] = entry; ++fCount; return entry.symbol; } // addSymbol0(String,int,int):String /** * Adds the specified symbol to the symbol table and returns a * reference to the unique symbol. If the symbol already exists, * the previous symbol reference is returned instead, in order * guarantee that symbol references remain unique. * * @param buffer The buffer containing the new symbol. * @param offset The offset into the buffer of the new symbol. * @param length The length of the new symbol in the buffer. */ public String addSymbol(char[] buffer, int offset, int length) { // search for identical symbol int collisionCount = 0; int bucket = hash(buffer, offset, length) % fTableSize; OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { if (length == entry.characters.length) { for (int i = 0; i < length; i++) { if (buffer[offset + i] != entry.characters[i]) { ++collisionCount; continue OUTER; } } return entry.symbol; } ++collisionCount; } return addSymbol0(buffer, offset, length, bucket, collisionCount); } // addSymbol(char[],int,int):String private String addSymbol0(char[] buffer, int offset, int length, int bucket, int collisionCount) { if (fCount >= fThreshold) { // Rehash the table if the threshold is exceeded rehash(); bucket = hash(buffer, offset, length) % fTableSize; } else if (collisionCount >= fCollisionThreshold) { // Select a new hash function and rehash the table if // the collision threshold is exceeded. rebalance(); bucket = hash(buffer, offset, length) % fTableSize; } // add new entry Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]); fBuckets[bucket] = entry; ++fCount; return entry.symbol; } // addSymbol0(char[],int,int,int,int):String /** * Returns a hashcode value for the specified symbol. The value * returned by this method must be identical to the value returned * by the <code>hash(char[],int,int)</code> method when called * with the character array that comprises the symbol string. * * @param symbol The symbol to hash. */ public int hash(String symbol) { if (fHashMultipliers == null) { return symbol.hashCode() & 0x7FFFFFFF; } return hash0(symbol); } // hash(String):int private int hash0(String symbol) { int code = 0; final int length = symbol.length(); final int[] multipliers = fHashMultipliers; for (int i = 0; i < length; ++i) { code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i); } return code & 0x7FFFFFFF; } // hash0(String):int /** * Returns a hashcode value for the specified symbol information. * The value returned by this method must be identical to the value * returned by the <code>hash(String)</code> method when called * with the string object created from the symbol information. * * @param buffer The character buffer containing the symbol. * @param offset The offset into the character buffer of the start * of the symbol. * @param length The length of the symbol. */ public int hash(char[] buffer, int offset, int length) { if (fHashMultipliers == null) { int code = 0; for (int i = 0; i < length; ++i) { code = code * 31 + buffer[offset + i]; } return code & 0x7FFFFFFF; } return hash0(buffer, offset, length); } // hash(char[],int,int):int private int hash0(char[] buffer, int offset, int length) { int code = 0; final int[] multipliers = fHashMultipliers; for (int i = 0; i < length; ++i) { code = code * multipliers[i & MULTIPLIERS_MASK] + buffer[offset + i]; } return code & 0x7FFFFFFF; } // hash0(char[],int,int):int /** * Increases the capacity of and internally reorganizes this * SymbolTable, in order to accommodate and access its entries more * efficiently. This method is called automatically when the * number of keys in the SymbolTable exceeds this hashtable's capacity * and load factor. */ protected void rehash() { rehashCommon(fBuckets.length * 2 + 1); } /** * Randomly selects a new hash function and reorganizes this SymbolTable * in order to more evenly distribute its entries across the table. This * method is called automatically when the number keys in one of the * SymbolTable's buckets exceeds the given collision threshold. */ protected void rebalance() { if (fHashMultipliers == null) { fHashMultipliers = new int[MULTIPLIERS_SIZE]; } PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers); rehashCommon(fBuckets.length); } private void rehashCommon(final int newCapacity) { int oldCapacity = fBuckets.length; Entry[] oldTable = fBuckets; Entry[] newTable = new Entry[newCapacity]; fThreshold = (int)(newCapacity * fLoadFactor); fBuckets = newTable; fTableSize = fBuckets.length; for (int i = oldCapacity ; i-- > 0 ;) { for (Entry old = oldTable[i] ; old != null ; ) { Entry e = old; old = old.next; int index = hash(e.symbol) % newCapacity; e.next = newTable[index]; newTable[index] = e; } } } /** * Returns true if the symbol table already contains the specified * symbol. * * @param symbol The symbol to look for. */ public boolean containsSymbol(String symbol) { // search for identical symbol int bucket = hash(symbol) % fTableSize; int length = symbol.length(); OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { if (length == entry.characters.length) { for (int i = 0; i < length; i++) { if (symbol.charAt(i) != entry.characters[i]) { continue OUTER; } } return true; } } return false; } // containsSymbol(String):boolean /** * Returns true if the symbol table already contains the specified * symbol. * * @param buffer The buffer containing the symbol to look for. * @param offset The offset into the buffer. * @param length The length of the symbol in the buffer. */ public boolean containsSymbol(char[] buffer, int offset, int length) { // search for identical symbol int bucket = hash(buffer, offset, length) % fTableSize; OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { if (length == entry.characters.length) { for (int i = 0; i < length; i++) { if (buffer[offset + i] != entry.characters[i]) { continue OUTER; } } return true; } } return false; } // containsSymbol(char[],int,int):boolean // // Classes // /** * This class is a symbol table entry. Each entry acts as a node * in a linked list. */ protected static final class Entry { // // Data // /** Symbol. */ public final String symbol; /** * Symbol characters. This information is duplicated here for * comparison performance. */ public final char[] characters; /** The next entry. */ public Entry next; // // Constructors // /** * Constructs a new entry from the specified symbol and next entry * reference. */ public Entry(String symbol, Entry next) { this.symbol = symbol.intern(); characters = new char[symbol.length()]; symbol.getChars(0, characters.length, characters, 0); this.next = next; } /** * Constructs a new entry from the specified symbol information and * next entry reference. */ public Entry(char[] ch, int offset, int length, Entry next) { characters = new char[length]; System.arraycopy(ch, offset, characters, 0, length); symbol = new String(characters).intern(); this.next = next; } } // class Entry } // class SymbolTable