HashSet.java example

Explorer
google-web-toolkit-svnmirror-master
/*
 * Copyright 2009 Google Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.gwt.dev.util.collect;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.AbstractSet;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;

/**
 * A memory-efficient hash set.
 * 
 * @param <E> the element type
 */
public class HashSet<E> extends AbstractSet<E> implements Serializable {

  private class SetIterator implements Iterator<E> {
    private Object[] coModCheckTable = table;
    private int index = 0;
    private int last = -1;

    public boolean hasNext() {
      if (coModCheckTable != table) {
        throw new ConcurrentModificationException();
      }
      advanceToItem();
      return index < table.length;
    }

    @SuppressWarnings("unchecked")
    public E next() {
      if (!hasNext()) {
        throw new NoSuchElementException();
      }
      last = index;
      return (E) unmaskNull(table[index++]);
    }

    public void remove() {
      if (last < 0) {
        throw new IllegalStateException();
      }
      if (coModCheckTable != table) {
        throw new ConcurrentModificationException();
      }
      internalRemove(last);
      if (table[last] != null) {
        // Hole was plugged.
        index = last;
      }
      last = -1;
    }

    private void advanceToItem() {
      for (; index < table.length; ++index) {
        if (table[index] != null) {
          return;
        }
      }
    }
  }

  /**
   * In the interest of memory-savings, we start with the smallest feasible
   * power-of-two table size that can hold three items without rehashing. If we
   * started with a size of 2, we'd have to expand as soon as the second item
   * was added.
   */
  private static final int INITIAL_TABLE_SIZE = 4;

  private static final Object NULL_ITEM = new Serializable() {
    Object readResolve() {
      return NULL_ITEM;
    }
  };

  static Object maskNull(Object o) {
    return (o == null) ? NULL_ITEM : o;
  }

  static Object unmaskNull(Object o) {
    return (o == NULL_ITEM) ? null : o;
  }

  /**
   * Number of objects in this set; transient due to custom serialization.
   * Default access to avoid synthetic accessors from inner classes.
   */
  transient int size = 0;

  /**
   * Backing store for all the objects; transient due to custom serialization.
   * Default access to avoid synthetic accessors from inner classes.
   */
  transient Object[] table;

  public HashSet() {
    table = new Object[INITIAL_TABLE_SIZE];
  }

  public HashSet(Collection<? extends E> c) {
    int newCapacity = INITIAL_TABLE_SIZE;
    int expectedSize = c.size();
    while (newCapacity * 3 < expectedSize * 4) {
      newCapacity <<= 1;
    }

    table = new Object[newCapacity];
    super.addAll(c);
  }

  /**
   * Works just like {@link #HashSet(Collection)}, but for arrays. Used to avoid
   * having to synthesize a collection in {@link Sets}.
   */
  HashSet(E[] c) {
    int newCapacity = INITIAL_TABLE_SIZE;
    int expectedSize = c.length;
    while (newCapacity * 3 < expectedSize * 4) {
      newCapacity <<= 1;
    }

    table = new Object[newCapacity];
    for (E e : c) {
      add(e);
    }
  }

  @Override
  public boolean add(E e) {
    int index = findOrEmpty(e);
    if (table[index] == null) {
      // Not in the map, may need to grow.
      if (ensureSizeFor(++size)) {
        // If we had to grow the table, must recompute the index.
        index = findOrEmpty(e);
      }
      table[index] = maskNull(e);
      return true;
    }
    return false;
  }

  @Override
  public boolean addAll(Collection<? extends E> c) {
    resizeForJoin(c.size());
    return super.addAll(c);
  }

  @Override
  public void clear() {
    table = new Object[INITIAL_TABLE_SIZE];
    size = 0;
  }

  @Override
  public boolean contains(Object o) {
    return find(o) >= 0;
  }

  @Override
  public Iterator<E> iterator() {
    return new SetIterator();
  }

  @Override
  public boolean remove(Object o) {
    int index = find(o);
    if (index < 0) {
      return false;
    }
    internalRemove(index);
    return true;
  }

  @Override
  public int size() {
    return size;
  }

  @Override
  public Object[] toArray() {
    return toArray(new Object[size]);
  }

  @SuppressWarnings("unchecked")
  @Override
  public <T> T[] toArray(T[] a) {
    if (a.length < size) {
      a = (T[]) Array.newInstance(a.getClass().getComponentType(), size);
    }
    int index = 0;
    for (int i = 0; i < table.length; ++i) {
      Object e = table[i];
      if (e != null) {
        a[index++] = (T) unmaskNull(e);
      }
    }
    while (index < a.length) {
      a[index++] = null;
    }
    return a;
  }

  /**
   * Adapted from {@link org.apache.commons.collections.map.AbstractHashedMap}.
   */
  @SuppressWarnings("unchecked")
  protected void doReadObject(ObjectInputStream in) throws IOException,
      ClassNotFoundException {
    table = new Object[in.readInt()];
    int items = in.readInt();
    for (int i = 0; i < items; i++) {
      add((E) in.readObject());
    }
  }

  /**
   * Adapted from {@link org.apache.commons.collections.map.AbstractHashedMap}.
   */
  protected void doWriteObject(ObjectOutputStream out) throws IOException {
    out.writeInt(table.length);
    out.writeInt(size);
    for (int i = 0; i < table.length; ++i) {
      Object e = table[i];
      if (e != null) {
        out.writeObject(unmaskNull(e));
      }
    }
  }

  /**
   * Returns whether two items are equal for the purposes of this set.
   */
  protected boolean itemEquals(Object a, Object b) {
    return (a == null) ? (b == null) : a.equals(b);
  }

  /**
   * Return the hashCode for an item.
   */
  protected int itemHashCode(Object o) {
    return (o == null) ? 0 : o.hashCode();
  }

  /**
   * Removes the item at the specified index, and performs internal management
   * to make sure we don't wind up with a hole in the table. Default access to
   * avoid synthetic accessors from inner classes.
   */
  void internalRemove(int index) {
    table[index] = null;
    --size;
    plugHole(index);
  }

  /**
   * Ensures the set is large enough to contain the specified number of entries.
   */
  private boolean ensureSizeFor(int expectedSize) {
    if (table.length * 3 >= expectedSize * 4) {
      return false;
    }

    int newCapacity = table.length << 1;
    while (newCapacity * 3 < expectedSize * 4) {
      newCapacity <<= 1;
    }

    Object[] oldTable = table;
    table = new Object[newCapacity];
    for (Object o : oldTable) {
      if (o != null) {
        int newIndex = getIndex(unmaskNull(o));
        while (table[newIndex] != null) {
          if (++newIndex == table.length) {
            newIndex = 0;
          }
        }
        table[newIndex] = o;
      }
    }
    return true;
  }

  /**
   * Returns the index in the table at which a particular item resides, or -1 if
   * the item is not in the table.
   */
  private int find(Object o) {
    int index = getIndex(o);
    while (true) {
      Object existing = table[index];
      if (existing == null) {
        return -1;
      }
      if (itemEquals(o, unmaskNull(existing))) {
        return index;
      }
      if (++index == table.length) {
        index = 0;
      }
    }
  }

  /**
   * Returns the index in the table at which a particular item resides, or the
   * index of an empty slot in the table where this item should be inserted if
   * it is not already in the table.
   */
  private int findOrEmpty(Object o) {
    int index = getIndex(o);
    while (true) {
      Object existing = table[index];
      if (existing == null) {
        return index;
      }
      if (itemEquals(o, unmaskNull(existing))) {
        return index;
      }
      if (++index == table.length) {
        index = 0;
      }
    }
  }

  private int getIndex(Object o) {
    int h = itemHashCode(o);
    // Copied from Apache's AbstractHashedMap; prevents power-of-two collisions.
    h += ~(h << 9);
    h ^= (h >>> 14);
    h += (h << 4);
    h ^= (h >>> 10);
    // Power of two trick.
    return h & (table.length - 1);
  }

  /**
   * Tricky, we left a hole in the map, which we have to fill. The only way to
   * do this is to search forwards through the map shuffling back values that
   * match this index until we hit a null.
   */
  private void plugHole(int hole) {
    int index = hole + 1;
    if (index == table.length) {
      index = 0;
    }
    while (table[index] != null) {
      int targetIndex = getIndex(unmaskNull(table[index]));
      if (hole < index) {
        /*
         * "Normal" case, the index is past the hole and the "bad range" is from
         * hole (exclusive) to index (inclusive).
         */
        if (!(hole < targetIndex && targetIndex <= index)) {
          // Plug it!
          table[hole] = table[index];
          table[index] = null;
          hole = index;
        }
      } else {
        /*
         * "Wrapped" case, the index is before the hole (we've wrapped) and the
         * "good range" is from index (exclusive) to hole (inclusive).
         */
        if (index < targetIndex && targetIndex <= hole) {
          // Plug it!
          table[hole] = table[index];
          table[index] = null;
          hole = index;
        }
      }
      if (++index == table.length) {
        index = 0;
      }
    }
  }

  private void readObject(ObjectInputStream in) throws IOException,
      ClassNotFoundException {
    in.defaultReadObject();
    doReadObject(in);
  }

  /**
   * Resizes this set to accommodate the minimum size required to join this set
   * with another set. This is an optimization to prevent multiple resizes
   * during the join operation. Naively, it would seem like we should resize to
   * hold {@code (size + otherSize)}. However, the incoming set might have
   * duplicates with this set; it might even be all duplicates. The correct
   * behavior when the incoming set is all duplicates is NOT to resize, and
   * therefore not to invalidate any iterators.
   * <p>
   * In practice, this strategy results in a worst-case of two resizes. In the
   * worst case, where {@code size} and {@code otherSize} are roughly equal and
   * the sets are completely disjoint, we might do 1 initial rehash and then 1
   * additional rehash down the road. But this is an edge case that requires
   * getting unlucky on both boundaries. Most of the time, we do either 1
   * initial rehash or 1 down the road, because doubling the capacity generally
   * allows this set to absorb an equally-sized disjoint set.
   */
  private void resizeForJoin(int sizeOther) {
    ensureSizeFor(Math.max(size, sizeOther));
  }

  private void writeObject(ObjectOutputStream out) throws IOException {
    out.defaultWriteObject();
    doWriteObject(out);
  }
}