package org.apache.lucene.analysis;
import java.util.AbstractSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A simple class that stores Strings as char[]'s in a
* hash table. Note that this is not a general purpose
* class. For example, it cannot remove items from the
* set, nor does it resize its hash table to be smaller,
* etc. It is designed to be quick to test if a char[]
* is in the set without the necessity of converting it
* to a String first.
* <P>
* <em>Please note:</em> This class implements {@link java.util.Set Set} but
* does not behave like it should in all cases. The generic type is
* {@code Set<Object>}, because you can add any object to it,
* that has a string representation. The add methods will use
* {@link Object#toString} and store the result using a {@code char[]}
* buffer. The same behaviour have the {@code contains()} methods.
* The {@link #iterator()} returns an {@code Iterator<String>}.
* For type safety also {@link #stringIterator()} is provided.
*/
public class CharArraySet extends AbstractSet<Object> {
private final static int INIT_SIZE = 8;
private char[][] entries;
private int count;
private final boolean ignoreCase;
public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(new CharArraySet(0, false));
/** Create set with enough capacity to hold startSize
* terms */
public CharArraySet(int startSize, boolean ignoreCase) {
this.ignoreCase = ignoreCase;
int size = INIT_SIZE;
while(startSize + (startSize>>2) > size)
size <<= 1;
entries = new char[size][];
}
/** Create set from a Collection of char[] or String */
public CharArraySet(Collection<? extends Object> c, boolean ignoreCase) {
this(c.size(), ignoreCase);
addAll(c);
}
/** Create set from entries */
private CharArraySet(char[][] entries, boolean ignoreCase, int count){
this.entries = entries;
this.ignoreCase = ignoreCase;
this.count = count;
}
/** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
* are in the set */
public boolean contains(char[] text, int off, int len) {
return entries[getSlot(text, off, len)] != null;
}
/** true if the <code>CharSequence</code> is in the set */
public boolean contains(CharSequence cs) {
return entries[getSlot(cs)] != null;
}
private int getSlot(char[] text, int off, int len) {
int code = getHashCode(text, off, len);
int pos = code & (entries.length-1);
char[] text2 = entries[pos];
if (text2 != null && !equals(text, off, len, text2)) {
final int inc = ((code>>8)+code)|1;
do {
code += inc;
pos = code & (entries.length-1);
text2 = entries[pos];
} while (text2 != null && !equals(text, off, len, text2));
}
return pos;
}
/** Returns true if the String is in the set */
private int getSlot(CharSequence text) {
int code = getHashCode(text);
int pos = code & (entries.length-1);
char[] text2 = entries[pos];
if (text2 != null && !equals(text, text2)) {
final int inc = ((code>>8)+code)|1;
do {
code += inc;
pos = code & (entries.length-1);
text2 = entries[pos];
} while (text2 != null && !equals(text, text2));
}
return pos;
}
/** Add this CharSequence into the set */
public boolean add(CharSequence text) {
return add(text.toString()); // could be more efficient
}
/** Add this String into the set */
public boolean add(String text) {
return add(text.toCharArray());
}
/** Add this char[] directly to the set.
* If ignoreCase is true for this Set, the text array will be directly modified.
* The user should never modify this text array after calling this method.
*/
public boolean add(char[] text) {
if (ignoreCase)
for(int i=0;i<text.length;i++)
text[i] = Character.toLowerCase(text[i]);
int slot = getSlot(text, 0, text.length);
if (entries[slot] != null) return false;
entries[slot] = text;
count++;
if (count + (count>>2) > entries.length) {
rehash();
}
return true;
}
private boolean equals(char[] text1, int off, int len, char[] text2) {
if (len != text2.length)
return false;
if (ignoreCase) {
for(int i=0;i<len;i++) {
if (Character.toLowerCase(text1[off+i]) != text2[i])
return false;
}
} else {
for(int i=0;i<len;i++) {
if (text1[off+i] != text2[i])
return false;
}
}
return true;
}
private boolean equals(CharSequence text1, char[] text2) {
int len = text1.length();
if (len != text2.length)
return false;
if (ignoreCase) {
for(int i=0;i<len;i++) {
if (Character.toLowerCase(text1.charAt(i)) != text2[i])
return false;
}
} else {
for(int i=0;i<len;i++) {
if (text1.charAt(i) != text2[i])
return false;
}
}
return true;
}
private void rehash() {
final int newSize = 2*entries.length;
char[][] oldEntries = entries;
entries = new char[newSize][];
for(int i=0;i<oldEntries.length;i++) {
char[] text = oldEntries[i];
if (text != null) {
// todo: could be faster... no need to compare strings on collision
entries[getSlot(text,0,text.length)] = text;
}
}
}
private int getHashCode(char[] text, int offset, int len) {
int code = 0;
final int stop = offset + len;
if (ignoreCase) {
for (int i=offset; i<stop; i++) {
code = code*31 + Character.toLowerCase(text[i]);
}
} else {
for (int i=offset; i<stop; i++) {
code = code*31 + text[i];
}
}
return code;
}
private int getHashCode(CharSequence text) {
int code = 0;
int len = text.length();
if (ignoreCase) {
for (int i=0; i<len; i++) {
code = code*31 + Character.toLowerCase(text.charAt(i));
}
} else {
for (int i=0; i<len; i++) {
code = code*31 + text.charAt(i);
}
}
return code;
}
@Override
public int size() {
return count;
}
@Override
public boolean isEmpty() {
return count==0;
}
@Override
public boolean contains(Object o) {
if (o instanceof char[]) {
final char[] text = (char[])o;
return contains(text, 0, text.length);
}
return contains(o.toString());
}
@Override
public boolean add(Object o) {
if (o instanceof char[]) {
return add((char[])o);
}
return add(o.toString());
}
/**
* Returns an unmodifiable {@link CharArraySet}. This allows to provide
* unmodifiable views of internal sets for "read-only" use.
*
* @param set
* a set for which the unmodifiable set is returned.
* @return an new unmodifiable {@link CharArraySet}.
* @throws NullPointerException
* if the given set is <code>null</code>.
*/
public static CharArraySet unmodifiableSet(CharArraySet set) {
if (set == null)
throw new NullPointerException("Given set is null");
if (set == EMPTY_SET)
return EMPTY_SET;
if (set instanceof UnmodifiableCharArraySet)
return set;
/*
* Instead of delegating calls to the given set copy the low-level values to
* the unmodifiable Subclass
*/
return new UnmodifiableCharArraySet(set.entries, set.ignoreCase, set.count);
}
/**
* Returns a copy of the given set as a {@link CharArraySet}. If the given set
* is a {@link CharArraySet} the ignoreCase property will be preserved.
*
* @param set
* a set to copy
* @return a copy of the given set as a {@link CharArraySet}. If the given set
* is a {@link CharArraySet} the ignoreCase property will be
* preserved.
*/
public static CharArraySet copy(Set<?> set) {
if (set == null)
throw new NullPointerException("Given set is null");
if(set == EMPTY_SET)
return EMPTY_SET;
final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase
: false;
return new CharArraySet(set, ignoreCase);
}
/** The Iterator<String> for this set. Strings are constructed on the fly, so
* use <code>nextCharArray</code> for more efficient access. */
public class CharArraySetIterator implements Iterator<String> {
int pos=-1;
char[] next;
CharArraySetIterator() {
goNext();
}
private void goNext() {
next = null;
pos++;
while (pos < entries.length && (next=entries[pos]) == null) pos++;
}
public boolean hasNext() {
return next != null;
}
/** do not modify the returned char[] */
public char[] nextCharArray() {
char[] ret = next;
goNext();
return ret;
}
/** Returns the next String, as a Set<String> would...
* use nextCharArray() for better efficiency. */
public String next() {
return new String(nextCharArray());
}
public void remove() {
throw new UnsupportedOperationException();
}
}
/** returns an iterator of new allocated Strings */
public Iterator<String> stringIterator() {
return new CharArraySetIterator();
}
/** returns an iterator of new allocated Strings, this method violates the Set interface */
@Override
@SuppressWarnings("unchecked")
public Iterator<Object> iterator() {
return (Iterator) stringIterator();
}
/**
* Efficient unmodifiable {@link CharArraySet}. This implementation does not
* delegate calls to a give {@link CharArraySet} like
* {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes
* the internal representation of a {@link CharArraySet} to a super
* constructor and overrides all mutators.
*/
private static final class UnmodifiableCharArraySet extends CharArraySet {
private UnmodifiableCharArraySet(char[][] entries, boolean ignoreCase,
int count) {
super(entries, ignoreCase, count);
}
@Override
public boolean add(Object o){
throw new UnsupportedOperationException();
}
@Override
public boolean addAll(Collection<? extends Object> coll) {
throw new UnsupportedOperationException();
}
@Override
public boolean add(char[] text) {
throw new UnsupportedOperationException();
}
@Override
public boolean add(CharSequence text) {
throw new UnsupportedOperationException();
}
@Override
public boolean add(String text) {
throw new UnsupportedOperationException();
}
}
}