/*********************************************************************************************************************** * * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. * **********************************************************************************************************************/ package eu.stratosphere.api.java.typeutils.runtime; import java.io.IOException; import eu.stratosphere.api.common.typeutils.TypeComparator; import eu.stratosphere.api.common.typeutils.TypeSerializer; import eu.stratosphere.api.common.typeutils.TypeSerializerFactory; import eu.stratosphere.api.java.tuple.Tuple; import eu.stratosphere.core.memory.DataInputView; import eu.stratosphere.core.memory.DataOutputView; import eu.stratosphere.core.memory.MemorySegment; import eu.stratosphere.types.KeyFieldOutOfBoundsException; import eu.stratosphere.types.NullKeyFieldException; public final class TupleComparator<T extends Tuple> extends TypeComparator<T> implements java.io.Serializable { private static final long serialVersionUID = 1L; /** key positions describe which fields are keys in what order */ private final int[] keyPositions; /** comparators for the key fields, in the same order as the key fields */ private final TypeComparator<Object>[] comparators; /** serializer factories to duplicate non thread-safe serializers */ private final TypeSerializerFactory<Object>[] serializerFactories; private final int[] normalizedKeyLengths; private final int numLeadingNormalizableKeys; private final int normalizableKeyPrefixLen; private final boolean invertNormKey; /** serializers to deserialize the first n fields for comparison */ private transient TypeSerializer<Object>[] serializers; // cache for the deserialized field objects private transient Object[] deserializedFields1; private transient Object[] deserializedFields2; @SuppressWarnings("unchecked") public TupleComparator(int[] keyPositions, TypeComparator<?>[] comparators, TypeSerializer<?>[] serializers) { // set the default utils this.keyPositions = keyPositions; this.comparators = (TypeComparator<Object>[]) comparators; this.serializers = (TypeSerializer<Object>[]) serializers; // set the serializer factories. this.serializerFactories = new TypeSerializerFactory[this.serializers.length]; for (int i = 0; i < serializers.length; i++) { this.serializerFactories[i] = this.serializers[i].isStateful() ? new RuntimeStatefulSerializerFactory<Object>(this.serializers[i], Object.class) : new RuntimeStatelessSerializerFactory<Object>(this.serializers[i], Object.class); } // set up auxiliary fields for normalized key support this.normalizedKeyLengths = new int[keyPositions.length]; int nKeys = 0; int nKeyLen = 0; boolean inverted = false; for (int i = 0; i < this.keyPositions.length; i++) { TypeComparator<?> k = this.comparators[i]; // as long as the leading keys support normalized keys, we can build up the composite key if (k.supportsNormalizedKey()) { if (i == 0) { // the first comparator decides whether we need to invert the key direction inverted = k.invertNormalizedKey(); } else if (k.invertNormalizedKey() != inverted) { // if a successor does not agree on the inversion direction, it cannot be part of the normalized key break; } nKeys++; final int len = k.getNormalizeKeyLen(); if (len < 0) { throw new RuntimeException("Comparator " + k.getClass().getName() + " specifies an invalid length for the normalized key: " + len); } this.normalizedKeyLengths[i] = len; nKeyLen += len; if (nKeyLen < 0) { // overflow, which means we are out of budget for normalized key space anyways nKeyLen = Integer.MAX_VALUE; break; } } else { break; } } this.numLeadingNormalizableKeys = nKeys; this.normalizableKeyPrefixLen = nKeyLen; this.invertNormKey = inverted; } @SuppressWarnings("unchecked") private TupleComparator(TupleComparator<T> toClone) { // copy fields and serializer factories this.keyPositions = toClone.keyPositions; this.serializerFactories = toClone.serializerFactories; this.comparators = new TypeComparator[toClone.comparators.length]; for (int i = 0; i < toClone.comparators.length; i++) { this.comparators[i] = toClone.comparators[i].duplicate(); } this.normalizedKeyLengths = toClone.normalizedKeyLengths; this.numLeadingNormalizableKeys = toClone.numLeadingNormalizableKeys; this.normalizableKeyPrefixLen = toClone.normalizableKeyPrefixLen; this.invertNormKey = toClone.invertNormKey; } // -------------------------------------------------------------------------------------------- // Comparator Methods // -------------------------------------------------------------------------------------------- protected int[] getKeyPositions() { return this.keyPositions; } protected TypeComparator<Object>[] getComparators() { return this.comparators; } // -------------------------------------------------------------------------------------------- // Comparator Methods // -------------------------------------------------------------------------------------------- @Override public int hash(T value) { int i = 0; try { int code = this.comparators[0].hash(value.getField(keyPositions[0])); for (i = 1; i < this.keyPositions.length; i++) { code *= HASH_SALT[i & 0x1F]; // salt code with (i % HASH_SALT.length)-th salt component code += this.comparators[i].hash(value.getField(keyPositions[i])); } return code; } catch (NullPointerException npex) { throw new NullKeyFieldException(keyPositions[i]); } catch (IndexOutOfBoundsException iobex) { throw new KeyFieldOutOfBoundsException(keyPositions[i]); } } @Override public void setReference(T toCompare) { int i = 0; try { for (; i < this.keyPositions.length; i++) { this.comparators[i].setReference(toCompare.getField(this.keyPositions[i])); } } catch (NullPointerException npex) { throw new NullKeyFieldException(keyPositions[i]); } catch (IndexOutOfBoundsException iobex) { throw new KeyFieldOutOfBoundsException(keyPositions[i]); } } @Override public boolean equalToReference(T candidate) { int i = 0; try { for (; i < this.keyPositions.length; i++) { if (!this.comparators[i].equalToReference(candidate.getField(this.keyPositions[i]))) { return false; } } return true; } catch (NullPointerException npex) { throw new NullKeyFieldException(keyPositions[i]); } catch (IndexOutOfBoundsException iobex) { throw new KeyFieldOutOfBoundsException(keyPositions[i]); } } @Override public int compareToReference(TypeComparator<T> referencedComparator) { TupleComparator<T> other = (TupleComparator<T>) referencedComparator; int i = 0; try { for (; i < this.keyPositions.length; i++) { int cmp = this.comparators[i].compareToReference(other.comparators[i]); if (cmp != 0) { return cmp; } } return 0; } catch (NullPointerException npex) { throw new NullKeyFieldException(keyPositions[i]); } catch (IndexOutOfBoundsException iobex) { throw new KeyFieldOutOfBoundsException(keyPositions[i]); } } @Override public int compare(T first, T second) { int i = 0; try { for (; i < keyPositions.length; i++) { int keyPos = keyPositions[i]; int cmp = comparators[i].compare(first.getField(keyPos), second.getField(keyPos)); if (cmp != 0) { return cmp; } } return 0; } catch (NullPointerException npex) { throw new NullKeyFieldException(keyPositions[i]); } catch (IndexOutOfBoundsException iobex) { throw new KeyFieldOutOfBoundsException(keyPositions[i]); } } @Override public int compare(DataInputView firstSource, DataInputView secondSource) throws IOException { if (deserializedFields1 == null) { instantiateDeserializationUtils(); } int i = 0; try { for (; i < serializers.length; i++) { deserializedFields1[i] = serializers[i].deserialize(deserializedFields1[i], firstSource); deserializedFields2[i] = serializers[i].deserialize(deserializedFields2[i], secondSource); } for (i = 0; i < keyPositions.length; i++) { int keyPos = keyPositions[i]; int cmp = comparators[i].compare(deserializedFields1[keyPos], deserializedFields2[keyPos]); if (cmp != 0) { return cmp; } } return 0; } catch (NullPointerException npex) { throw new NullKeyFieldException(keyPositions[i]); } catch (IndexOutOfBoundsException iobex) { throw new KeyFieldOutOfBoundsException(keyPositions[i]); } } @Override public boolean supportsNormalizedKey() { return this.numLeadingNormalizableKeys > 0; } @Override public int getNormalizeKeyLen() { return this.normalizableKeyPrefixLen; } @Override public boolean isNormalizedKeyPrefixOnly(int keyBytes) { return this.numLeadingNormalizableKeys < this.keyPositions.length || this.normalizableKeyPrefixLen == Integer.MAX_VALUE || this.normalizableKeyPrefixLen > keyBytes; } @Override public void putNormalizedKey(T value, MemorySegment target, int offset, int numBytes) { int i = 0; try { for (; i < this.numLeadingNormalizableKeys && numBytes > 0; i++) { int len = this.normalizedKeyLengths[i]; len = numBytes >= len ? len : numBytes; this.comparators[i].putNormalizedKey(value.getField(this.keyPositions[i]), target, offset, len); numBytes -= len; offset += len; } } catch (NullPointerException npex) { throw new NullKeyFieldException(this.keyPositions[i]); } } @Override public boolean invertNormalizedKey() { return this.invertNormKey; } @Override public boolean supportsSerializationWithKeyNormalization() { return false; } @Override public void writeWithKeyNormalization(T record, DataOutputView target) throws IOException { throw new UnsupportedOperationException(); } @Override public T readWithKeyDenormalization(T reuse, DataInputView source) throws IOException { throw new UnsupportedOperationException(); } @Override public TupleComparator<T> duplicate() { return new TupleComparator<T>(this); } // -------------------------------------------------------------------------------------------- @SuppressWarnings("unchecked") private final void instantiateDeserializationUtils() { if (this.serializers == null) { this.serializers = new TypeSerializer[this.serializerFactories.length]; for (int i = 0; i < this.serializers.length; i++) { this.serializers[i] = this.serializerFactories[i].getSerializer(); } } this.deserializedFields1 = new Object[this.serializers.length]; this.deserializedFields2 = new Object[this.serializers.length]; for (int i = 0; i < this.serializers.length; i++) { this.deserializedFields1[i] = this.serializers[i].createInstance(); this.deserializedFields2[i] = this.serializers[i].createInstance(); } } // -------------------------------------------------------------------------------------------- /** * A sequence of prime numbers to be used for salting the computed hash values. * Based on some empirical evidence, we are using a 32-element subsequence of the * OEIS sequence #A068652 (numbers such that every cyclic permutation is a prime). * * @see: http://en.wikipedia.org/wiki/List_of_prime_numbers * @see: http://oeis.org/A068652 */ private static final int[] HASH_SALT = new int[] { 73 , 79 , 97 , 113 , 131 , 197 , 199 , 311 , 337 , 373 , 719 , 733 , 919 , 971 , 991 , 1193 , 1931 , 3119 , 3779 , 7793 , 7937 , 9311 , 9377 , 11939 , 19391, 19937, 37199, 39119, 71993, 91193, 93719, 93911 }; }