/*
* Copyright 2001-2004 Sean Owen
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.planetj.math.rabinhash;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.io.ObjectInputStream;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.LongBuffer;
/**
* <p>This class provides an implementation of a hash function based on Rabin fingerprints, one
* which can efficiently produce a 64-bit hash value for a sequence of bytes. Its services and characteristics
* are entirely analogous to that of {@link RabinHashFunction32}, except that hash values are 64 bits and
* the implementation works in terms of degree 64 polynomials represented as <code>long</code>s.</p>
*
* <p>Please see the documentation and comments for {@link RabinHashFunction32} for more information.</p>
*
* @author Sean Owen
* @version 2.0
* @since 2.0
*/
public final class RabinHashFunction64 implements Serializable, Cloneable {
/** Represents x<sup>64</sup> + x<sup>4</sup> + x<sup>3</sup> + x + 1. */
private static final long DEFAULT_IRREDUCIBLE_POLY = 0x000000000000001BL;
/** Default hash function, provided for convenience. */
public static final RabinHashFunction64 DEFAULT_HASH_FUNCTION =
new RabinHashFunction64(DEFAULT_IRREDUCIBLE_POLY);
private static final int P_DEGREE = 64;
private static final long X_P_DEGREE = 1L << (P_DEGREE - 1);
private static final int READ_BUFFER_SIZE = 1024;
private final long P;
private transient long[] table32, table40, table48, table56, table64, table72, table80, table88;
/**
* <p>Creates a RabinHashFunction64 based on the specified polynomial.</p>
*
* <p>This class does not test the polynomial for irreducibility; therefore this constructor should
* only be used with polynomials that are already known to be irreducible, or else the hash function
* will not perform optimally.</p>
*
* @param P a degree 64 polynomial over GF(2), represented as a <code>long</code>
*/
public RabinHashFunction64(final long P) {
this.P = P;
initializeTables();
}
private void initializeTables() {
final long[] mods = new long[P_DEGREE];
// We want to have mods[i] == x^(P_DEGREE+i)
mods[0] = P;
for (int i = 1; i < P_DEGREE; i++) {
final long lastMod = mods[i - 1];
// x^i == x(x^(i-1)) (mod P)
long thisMod = lastMod << 1;
// if x^(i-1) had a x_(P_DEGREE-1) term then x^i has a
// x^P_DEGREE term that 'fell off' the top end.
// Since x^P_DEGREE == P (mod P), we should add P
// to account for this:
if ((lastMod & X_P_DEGREE) != 0) {
thisMod ^= P;
}
mods[i] = thisMod;
}
// Let i be a number between 0 and 255 (i.e. a byte).
// Let its bits be b0, b1, ..., b7.
// Let Q32 be the polynomial b0*x^39 + b1*x^38 + ... + b7*x^32 (mod P).
// Then table32[i] is Q32, represented as an int (see below).
// Likewise Q40 be the polynomial b0*x^47 + b1*x^46 + ... + b7*x^40 (mod P).
// table40[i] is Q40, represented as an int. Likewise table48 and table56, etc.
table32 = new long[256];
table40 = new long[256];
table48 = new long[256];
table56 = new long[256];
table64 = new long[256];
table72 = new long[256];
table80 = new long[256];
table88 = new long[256];
for (int i = 0; i < 256; i++) {
int c = i;
for (int j = 0; j < 8 && c > 0; j++) {
if ((c & 1) != 0) {
table32[i] ^= mods[j];
table40[i] ^= mods[j + 8];
table48[i] ^= mods[j + 16];
table56[i] ^= mods[j + 24];
table64[i] ^= mods[j + 32];
table72[i] ^= mods[j + 40];
table80[i] ^= mods[j + 48];
table88[i] ^= mods[j + 56];
}
c >>>= 1;
}
}
}
/**
* @return irreducible polynomial used in this hash function, represented as a <code>long</code>
*/
public long getP() {
return P;
}
private long computeWShifted(final long w) {
return table32[(int) (w & 0xFF)] ^
table40[(int) ((w >>> 8) & 0xFF)] ^
table48[(int) ((w >>> 16) & 0xFF)] ^
table56[(int) ((w >>> 24) & 0xFF)] ^
table64[(int) ((w >>> 32) & 0xFF)] ^
table72[(int) ((w >>> 40) & 0xFF)] ^
table80[(int) ((w >>> 48) & 0xFF)] ^
table88[(int) ((w >>> 56) & 0xFF)];
}
/**
* <p>Return the Rabin hash value of an array of bytes.</p>
*
* @param A the array of bytes
* @return the hash value
* @throws NullPointerException if A is null
*/
public long hash(final byte[] A) {
return hash(A, 0, A.length, 0);
}
long hash(final byte[] A, final int offset, final int length, long w) {
int s = offset;
// First, process a few bytes so that the number of bytes remaining is a multiple of 8.
// This makes the later loop easier.
final int starterBytes = length % 8;
if (starterBytes != 0) {
final int max = offset + starterBytes;
while (s < max) {
w = (w << 8) ^ (A[s] & 0xFF);
s++;
}
}
final int max = offset + length;
while (s < max) {
w = computeWShifted(w) ^
(A[s] << 56) ^
((A[s + 1] & 0xFF) << 48) ^
((A[s + 2] & 0xFF) << 40) ^
((A[s + 3] & 0xFF) << 32) ^
((A[s + 4] & 0xFF) << 24) ^
((A[s + 5] & 0xFF) << 16) ^
((A[s + 6] & 0xFF) << 8) ^
(A[s + 7] & 0xFF);
s += 8;
}
return w;
}
/**
* <p>Return the Rabin hash value of an array of chars.</p>
*
* @param A the array of chars
* @return the hash value
* @throws NullPointerException if A is null
*/
public long hash(final char[] A) {
int s = 0;
long w = 0;
// First, process a few chars so that the number of bytes remaining is a multiple of 4.
// This makes the later loop easier.
final int starterChars = A.length % 4;
while (s < starterChars) {
w = (w << 16) ^ (A[s] & 0xFFFF);
s++;
}
while (s < A.length) {
w = computeWShifted(w) ^
((A[s] & 0xFFFF) << 48) ^
((A[s + 1] & 0xFFFF) << 32) ^
((A[s + 2] & 0xFFFF) << 16) ^
(A[s + 3] & 0xFFFF);
s += 4;
}
return w;
}
/**
* <p>Returns the Rabin hash value of an array of <code>long</code>s. This method is the most efficient of
* all the hash methods, so it should be used when possible.</p>
*
* @param A array of <code>long</code>s
* @return the hash value
* @throws NullPointerException if A is null
*/
public long hash(final long[] A) {
long w = 0;
for (int s = 0; s < A.length; s++) {
w = computeWShifted(w) ^ A[s];
}
return w;
}
/**
* <p>Returns the Rabin hash value of a ByteBuffer.</p>
*
* @param A ByteBuffer
* @return the hash value
* @throws NullPointerException if A is null
*/
public long hash(final ByteBuffer A) {
return hash(A.asLongBuffer());
}
/**
* <p>Returns the Rabin hash value of an LongBuffer.</p>
*
* @param A LongBuffer
* @return the hash value
* @throws NullPointerException if A is null
*/
public long hash(final LongBuffer A) {
long w = 0;
while (A.hasRemaining()) {
w = computeWShifted(w) ^ A.get();
}
return w;
}
/**
* <p>Returns the Rabin hash value of a serializable object.</p>
*
* @return the hash value
* @param obj the object to be hashed
* @throws NullPointerException if obj is null
*/
public long hash(final Serializable obj) {
if (obj == null) {
throw new NullPointerException();
}
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
final ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(obj);
} catch (IOException ioe) {
// can't happen
}
return hash(baos.toByteArray());
}
/**
* <p>Computes the Rabin hash value of a String.</p>
*
* @param s the string to be hashed
* @return the hash value
* @throws NullPointerException if s is null
*/
public long hash(final String s) {
return hash(s.toCharArray());
}
/**
* <p>Computes the Rabin hash value of the contents of a file.</p>
*
* @return the hash value of the file
* @param f the file to be hashed
* @throws FileNotFoundException if the file cannot be found
* @throws IOException if an error occurs while reading the file
* @throws NullPointerException if f is null
*/
public long hash(final File f) throws FileNotFoundException, IOException {
if (f == null) {
throw new NullPointerException();
}
final FileInputStream fis = new FileInputStream(f);
try {
return hash(fis);
} finally {
fis.close();
}
}
/**
* <p>Computes the Rabin hash value of the contents of a file, specified by URL.</p>
*
* @return the hash value of the file
* @param url the URL of the file to be hashed
* @throws IOException if an error occurs while reading from the URL
* @throws NullPointerException if url is null
*/
public long hash(final URL url) throws IOException {
final InputStream is = url.openStream();
try {
return hash(is);
} finally {
is.close();
}
}
/**
* <p>Computes the Rabin hash value of the data from an <code>InputStream</code>.</p>
*
* @param is the InputStream to hash
* @return the hash value of the data from the InputStream
* @throws IOException if an error occurs while reading from the InputStream
* @throws NullPointerException if stream is null
*/
public long hash(final InputStream is) throws IOException {
final byte[] buffer = new byte[READ_BUFFER_SIZE];
long w = 0;
int bytesRead;
while ((bytesRead = is.read(buffer)) > 0) {
w = hash(buffer, 0, bytesRead, w);
}
return w;
}
public boolean equals(final Object o) {
return o instanceof RabinHashFunction64 && ((RabinHashFunction64) o).P == P;
}
public int hashCode() {
return ((int) P) ^ ((int) (P >> 32));
}
public String toString() {
return "RabinHashFunction64[P: " + RabinHashFunctionUtils.polynomialToString(P) + "]";
}
private void readObject(final ObjectInputStream stream) throws IOException, ClassNotFoundException {
stream.defaultReadObject();
initializeTables();
}
}