/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.typeutils;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.core.memory.MemorySegment;
import java.io.IOException;
import java.io.Serializable;
/**
* This interface describes the methods that are required for a data type to be handled by the pact
* runtime. Specifically, this interface contains the methods used for hashing, comparing, and creating
* auxiliary structures.
* <p>
* The methods in this interface depend not only on the record, but also on what fields of a record are
* used for the comparison or hashing. That set of fields is typically a subset of a record's fields.
* In general, this class assumes a contract on hash codes and equality the same way as defined for
* {@link java.lang.Object#equals(Object)} {@link java.lang.Object#equals(Object)}
* <p>
* Implementing classes are stateful, because several methods require to set one record as the reference for
* comparisons and later comparing a candidate against it. Therefore, the classes implementing this interface are
* not thread safe. The runtime will ensure that no instance is used twice in different threads, but will create
* a copy for that purpose. It is hence imperative that the copies created by the {@link #duplicate()} method
* share no state with the instance from which they were copied: they have to be deep copies.
*
* @see java.lang.Object#hashCode()
* @see java.lang.Object#equals(Object)
* @see java.util.Comparator#compare(Object, Object)
*
* @param <T> The data type that the comparator works on.
*/
@PublicEvolving
public abstract class TypeComparator<T> implements Serializable {
private static final long serialVersionUID = 1L;
/**
* Computes a hash value for the given record. The hash value should include all fields in the record
* relevant to the comparison.
* <p>
* The hash code is typically not used as it is in hash tables and for partitioning, but it is further
* scrambled to make sure that a projection of the hash values to a lower cardinality space is as
* results in a rather uniform value distribution.
* However, any collisions produced by this method cannot be undone. While it is NOT
* important to create hash codes that cover the full spectrum of bits in the integer, it IS important
* to avoid collisions when combining two value as much as possible.
*
* @param record The record to be hashed.
* @return A hash value for the record.
*
* @see java.lang.Object#hashCode()
*/
public abstract int hash(T record);
/**
* Sets the given element as the comparison reference for future calls to
* {@link #equalToReference(Object)} and {@link #compareToReference(TypeComparator)}. This method
* must set the given element into this comparator instance's state. If the comparison happens on a subset
* of the fields from the record, this method may extract those fields.
* <p>
* A typical example for checking the equality of two elements is the following:
* <pre>{@code
* E e1 = ...;
* E e2 = ...;
*
* TypeComparator<E> acc = ...;
*
* acc.setReference(e1);
* boolean equal = acc.equalToReference(e2);
* }</pre>
*
* The rational behind this method is that elements are typically compared using certain features that
* are extracted from them, (such de-serializing as a subset of fields). When setting the
* reference, this extraction happens. The extraction needs happen only once per element,
* even though an element is often compared to multiple other elements, such as when finding equal elements
* in the process of grouping the elements.
*
* @param toCompare The element to set as the comparison reference.
*/
public abstract void setReference(T toCompare);
/**
* Checks, whether the given element is equal to the element that has been set as the comparison
* reference in this comparator instance.
*
* @param candidate The candidate to check.
* @return True, if the element is equal to the comparison reference, false otherwise.
*
* @see #setReference(Object)
*/
public abstract boolean equalToReference(T candidate);
/**
* This method compares the element that has been set as reference in this type accessor, to the
* element set as reference in the given type accessor. Similar to comparing two
* elements {@code e1} and {@code e2} via a comparator, this method can be used the
* following way.
*
* <pre>{@code
* E e1 = ...;
* E e2 = ...;
*
* TypeComparator<E> acc1 = ...;
* TypeComparator<E> acc2 = ...;
*
* acc1.setReference(e1);
* acc2.setReference(e2);
*
* int comp = acc1.compareToReference(acc2);
* }</pre>
*
* The rational behind this method is that elements are typically compared using certain features that
* are extracted from them, (such de-serializing as a subset of fields). When setting the
* reference, this extraction happens. The extraction needs happen only once per element,
* even though an element is typically compared to many other elements when establishing a
* sorted order. The actual comparison performed by this method may be very cheap, as it
* happens on the extracted features.
*
* @param referencedComparator The type accessors where the element for comparison has been set
* as reference.
*
* @return A value smaller than zero, if the reference value of {@code referencedAccessors} is smaller
* than the reference value of this type accessor; a value greater than zero, if it is larger;
* zero, if both are equal.
*
* @see #setReference(Object)
*/
public abstract int compareToReference(TypeComparator<T> referencedComparator);
// A special case method that the runtime uses for special "PactRecord" support
public boolean supportsCompareAgainstReference() {
return false;
}
/**
* Compares two records in object form. The return value indicates the order of the two in the same way
* as defined by {@link java.util.Comparator#compare(Object, Object)}.
*
* @param first The first record.
* @param second The second record.
* @return An integer defining the oder among the objects in the same way as {@link java.util.Comparator#compare(Object, Object)}.
*
* @see java.util.Comparator#compare(Object, Object)
*/
public abstract int compare(T first, T second);
/**
* Compares two records in serialized form. The return value indicates the order of the two in the same way
* as defined by {@link java.util.Comparator#compare(Object, Object)}.
* <p>
* This method may de-serialize the records or compare them directly based on their binary representation.
*
* @param firstSource The input view containing the first record.
* @param secondSource The input view containing the second record.
* @return An integer defining the oder among the objects in the same way as {@link java.util.Comparator#compare(Object, Object)}.
* @throws IOException Thrown, if any of the input views raised an exception when reading the records.
*
* @see java.util.Comparator#compare(Object, Object)
*/
public abstract int compareSerialized(DataInputView firstSource, DataInputView secondSource) throws IOException;
// --------------------------------------------------------------------------------------------
/**
* Checks whether the data type supports the creation of a normalized key for comparison.
*
* @return True, if the data type supports the creation of a normalized key for comparison, false otherwise.
*/
public abstract boolean supportsNormalizedKey();
/**
* Check whether this comparator supports to serialize the record in a format that replaces its keys by a normalized
* key.
*
* @return True, if the comparator supports that specific form of serialization, false if not.
*/
public abstract boolean supportsSerializationWithKeyNormalization();
/**
* Gets the number of bytes that the normalized key would maximally take. A value of
* {@link java.lang.Integer}.MAX_VALUE is interpreted as infinite.
*
* @return The number of bytes that the normalized key would maximally take.
*/
public abstract int getNormalizeKeyLen();
/**
* Checks, whether the given number of bytes for a normalized is only a prefix to determine the order of elements
* of the data type for which this comparator provides the comparison methods. For example, if the
* data type is ordered with respect to an integer value it contains, then this method would return
* true, if the number of key bytes is smaller than four.
*
* @return True, if the given number of bytes is only a prefix,
* false otherwise.
*/
public abstract boolean isNormalizedKeyPrefixOnly(int keyBytes);
/**
* Writes a normalized key for the given record into the target byte array, starting at the specified position
* and writing exactly the given number of bytes. Note that the comparison of the bytes is treating the bytes
* as unsigned bytes: {@code int byteI = bytes[i] & 0xFF;}
* <p>
* If the meaningful part of the normalized key takes less than the given number of bytes, than it must be padded.
* Padding is typically required for variable length data types, such as strings. The padding uses a special
* character, either {@code 0} or {@code 0xff}, depending on whether shorter values are sorted to the beginning or
* the end.
* <p>
* This method is similar to {@link org.apache.flink.types.NormalizableKey#copyNormalizedKey(MemorySegment, int, int)}. In the case that
* multiple fields of a record contribute to the normalized key, it is crucial that the fields align on the
* byte field, i.e. that every field always takes up the exact same number of bytes.
*
* @param record The record for which to create the normalized key.
* @param target The byte array into which to write the normalized key bytes.
* @param offset The offset in the byte array, where to start writing the normalized key bytes.
* @param numBytes The number of bytes to be written exactly.
*
* @see org.apache.flink.types.NormalizableKey#copyNormalizedKey(MemorySegment, int, int)
*/
public abstract void putNormalizedKey(T record, MemorySegment target, int offset, int numBytes);
/**
* Writes the record in such a fashion that all keys are normalizing and at the beginning of the serialized data.
* This must only be used when for all the key fields the full normalized key is used. The method
* {@code #supportsSerializationWithKeyNormalization()} allows to check that.
*
* @param record The record object into which to read the record data.
* @param target The stream to which to write the data,
*
* @see #supportsSerializationWithKeyNormalization()
* @see #readWithKeyDenormalization(Object, DataInputView)
* @see org.apache.flink.types.NormalizableKey#copyNormalizedKey(MemorySegment, int, int)
*/
public abstract void writeWithKeyNormalization(T record, DataOutputView target) throws IOException;
/**
* Reads the record back while de-normalizing the key fields. This must only be used when
* for all the key fields the full normalized key is used, which is hinted by the
* {@code #supportsSerializationWithKeyNormalization()} method.
*
* @param reuse The reuse object into which to read the record data.
* @param source The stream from which to read the data,
*
* @see #supportsSerializationWithKeyNormalization()
* @see #writeWithKeyNormalization(Object, DataOutputView)
* @see org.apache.flink.types.NormalizableKey#copyNormalizedKey(MemorySegment, int, int)
*/
public abstract T readWithKeyDenormalization(T reuse, DataInputView source) throws IOException;
/**
* Flag whether normalized key comparisons should be inverted key should be interpreted
* inverted, i.e. descending.
*
* @return True, if all normalized key comparisons should invert the sign of the comparison result,
* false if the normalized key should be used as is.
*/
public abstract boolean invertNormalizedKey();
// --------------------------------------------------------------------------------------------
/**
* Creates a copy of this class. The copy must be deep such that no state set in the copy affects this
* instance of the comparator class.
*
* @return A deep copy of this comparator instance.
*/
public abstract TypeComparator<T> duplicate();
// --------------------------------------------------------------------------------------------
/**
* Extracts the key fields from a record. This is for use by the PairComparator to provide
* interoperability between different record types. Note, that at least one key should be extracted.
* @param record The record that contains the key(s)
* @param target The array to write the key(s) into.
* @param index The offset of the target array to start writing into.
* @return the number of keys added to target.
*/
public abstract int extractKeys(Object record, Object[] target, int index);
/**
* Get the field comparators. This is used together with {@link #extractKeys(Object, Object[], int)}
* to provide interoperability between different record types. Note, that this should return at
* least one Comparator and that the number of Comparators must match the number of extracted
* keys.
* @return An Array of Comparators for the extracted keys.
*/
@SuppressWarnings("rawtypes")
public abstract TypeComparator[] getFlatComparators();
// --------------------------------------------------------------------------------------------
@SuppressWarnings("rawtypes")
public int compareAgainstReference(Comparable[] keys) {
throw new UnsupportedOperationException("Workaround hack.");
}
}