/* * Copyright 2013 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.template.soy.msgs.restricted; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import java.util.Random; /** * An memory-efficient object canonicalizer. * * <p>This functions similarly to string.intern(). It has extremely low memory overhead of a little * over one reference per item interned. Standard HashMap based interners use many bytes per object * interned, partially defeating the memory savings of interning. * */ final class CompactInterner { /** Initial size of the table. */ @VisibleForTesting static final int INITIAL_SIZE = 1024; /** * The maximum expected number of collisions to tolerate before growing. * * <p>Increasing this number reduces memory usage at the expense of more equals() checks for both * cache hits and misses. The max load factor of the table is 1 / (MAX_EXPECTED_COLLISION_COUNT + * 1). */ private static final int MAX_EXPECTED_COLLISION_COUNT = 4; /** * The denominator of the current size when growing the table. * * <p>Increasing this number results in better memory utilization at the expense of more frequent * rehashing. * * <p>Increasing this value results in a linear increase in the amortized number of equals() * checks for a miss because of the increased frequency of rehashes, but has no effect on cache * hits. Proof: Let D be GROWTH_DENOMINATOR. Right before a rehash, the total number of times any * item has been hashed is computed by assuming all items in the table have been hashed once, and * D/(D+1) have been hashed at least twice, and (D/D+1)^2 have been hashed thrice, etc. Since the * sum of a geometric series on x is 1/(1-x), the number of hashes per item inserted is exactly * 1/(1 - (D/D+1)), or (D+1)/(D + 1 - D) or exactly D+1. The number of rehashes per item is * exactly D, since its first insertion was not a rehash. */ private static final int GROWTH_DENOMINATOR = 4; /** Hash table of all the items interned. */ private Object[] table; /** Number of items in the table. */ private int count; /** The total number of collisions, including collisions incurred during a rehash. */ private long collisions; public CompactInterner() { table = new Object[INITIAL_SIZE]; count = 0; collisions = 0; } /** * Returns either the input, or an instance that equals it that was previously passed to this * method. * * <p>This operation performs in amortized constant time. */ @SuppressWarnings("unchecked") // If a.equals(b) then a and b have the same type. public synchronized <T> T intern(T value) { Preconditions.checkNotNull(value); // Use a pseudo-random number generator to mix up the high and low bits of the hash code. Random generator = new java.util.Random(value.hashCode()); int tries = 0; while (true) { int index = generator.nextInt(table.length); Object candidate = table[index]; if (candidate == null) { // Found a good place to hash it. count++; collisions += tries; table[index] = value; rehashIfNeeded(); return value; } if (candidate.equals(value)) { Preconditions.checkArgument( value.getClass() == candidate.getClass(), "Interned objects are equals() but different classes: %s and %s", value, candidate); return (T) candidate; } tries++; } } /** Doubles the table size. */ private void rehashIfNeeded() { int currentSize = table.length; if (currentSize - count >= currentSize / (MAX_EXPECTED_COLLISION_COUNT + 1)) { // Still enough overhead. return; } Object[] oldTable = table; // Grow the table so it increases by 1 / GROWTH_DENOMINATOR. int newSize = currentSize + currentSize / GROWTH_DENOMINATOR; table = new Object[newSize]; count = 0; for (Object element : oldTable) { if (element != null) { intern(element); } } } @VisibleForTesting double getAverageCollisions() { return 1.0 * collisions / count; } @VisibleForTesting static double getAverageCollisionsBound() { // NOTE: I'm sure there are research papers dedicated to open addressed hashing and load // factors but I've not been able to find them quickly. This is some rough empirical work // to make sure we get reasonable performance in tests. double x = Math.max(MAX_EXPECTED_COLLISION_COUNT, GROWTH_DENOMINATOR); return x * Math.log1p(x) + 1; } @VisibleForTesting double getOverhead() { return 1.0 * (table.length - count) / count; } @VisibleForTesting static final double getWorstCaseOverhead() { // This is the proportion of null entries to non-null entries right after a rehash. return (1 + 1.0 / MAX_EXPECTED_COLLISION_COUNT) * (1 + 1.0 / GROWTH_DENOMINATOR) - 1; } }