/* * Copyright (C) 2012 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.data.datumdigest; import com.facebook.data.types.Datum; import com.facebook.data.types.DatumType; import com.facebook.util.digest.DigestFunction; import com.facebook.util.digest.MurmurHash; /** * takes Datums and delegates to a hash function pair: * f1: long -> long * f2: byte[] -> long * * f2 has an option: internally, it produces 128 bit hash, so you can ask to use the upper or * lower 8 bytes. The default is the lower 8 bytes. For random hashing, in practice it does * not matter. */ public class DatumMurmur3Hash implements DatumDigest, DigestFunction<Datum> { // Since this digestfuncton needs to be deterministic, use some // fixed number to initialize it. This number cannot be changed in // future. Puma instances can go up and down, and the old data still // needs to be read. private final MurmurHash hasher = MurmurHash.createRepeatableHasher(); // applies only to to the hash(byte[]) where we have 128 bytes private final boolean useLsb; private DatumMurmur3Hash(boolean useLsb) { this.useLsb = useLsb; } public DatumMurmur3Hash() { this(true); } /** * for the case that input is not long-compatible and directly produces a long as a hash value, * we get a 128 bit hash. This means use the lower 8 bytes as the long to return * * @return digest that will use lower 8 bytes */ public static DatumMurmur3Hash useLsb() { return new DatumMurmur3Hash(true); } /** * for the case that input is not long-compatible and directly produces a long as a hash value, * we get a 128 bit hash. This means use the upper 8 bytes as the long to return * * @return digest that will use upper 8 bytes */ public static DatumMurmur3Hash useMsb() { return new DatumMurmur3Hash(false); } @Override public long computeDigest(Datum input) { // optimization to use the faster hash on a long when the type is an integer (in the // mathematical sense, not java/C types) if (DatumType.isLongCompatible(input)) { return hasher.hash(input.asLong()); } else { // this uses guava's hash byte[] bytes = hasher.hash(input.asBytes()); long value = 0; int start; int end; if (useLsb) { start = 8; end = 0; } else { start = 16; end = 9; } for (int i = start; i >= end; i--) { value <<= 8; value ^= bytes[i] & 0xFF; } return value; } } }