/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.common.type;
import java.util.Arrays;
/**
* This code was based on code from Microsoft's PolyBase.
*
* Misc utilities used in this package.
*/
public final class SqlMathUtil {
/** Mask to convert a long to a negative long. */
public static final long NEGATIVE_LONG_MASK = 0x8000000000000000L;
/** Mask to convert a long to an unsigned long. */
public static final long FULLBITS_63 = 0x7FFFFFFFFFFFFFFFL;
/** Mask to convert an int to a negative int. */
public static final int NEGATIVE_INT_MASK = 0x80000000;
/** Mask to convert signed integer to unsigned long. */
public static final long LONG_MASK = 0xFFFFFFFFL;
/** Mask to convert an int to an unsigned int. */
public static final int FULLBITS_31 = 0x7FFFFFFF;
/** Max unsigned integer. */
public static final int FULLBITS_32 = 0xFFFFFFFF;
/** 5^13 fits in 2^31. */
public static final int MAX_POWER_FIVE_INT31 = 13;
/** 5^x. All unsigned values. */
public static final int[] POWER_FIVES_INT31 = new int[MAX_POWER_FIVE_INT31 + 1];
/** 5^27 fits in 2^63. */
public static final int MAX_POWER_FIVE_INT63 = 27;
/** 5^x. All unsigned values. */
public static final long[] POWER_FIVES_INT63 = new long[MAX_POWER_FIVE_INT63 + 1];
/** 5^55 fits in 2^128. */
public static final int MAX_POWER_FIVE_INT128 = 55;
/** 5^x. */
public static final UnsignedInt128[] POWER_FIVES_INT128 = new UnsignedInt128[MAX_POWER_FIVE_INT128 + 1];
/**
* 1/5^x, scaled to 128bits (in other words, 2^128/5^x). Because of flooring,
* this is same or smaller than real value.
*/
public static final UnsignedInt128[] INVERSE_POWER_FIVES_INT128 = new UnsignedInt128[MAX_POWER_FIVE_INT128 + 1];
/** 10^9 fits in 2^31. */
public static final int MAX_POWER_TEN_INT31 = 9;
/** 10^x. All unsigned values. */
public static final int[] POWER_TENS_INT31 = new int[MAX_POWER_TEN_INT31 + 1];
/** 5 * 10^(x-1). */
public static final int[] ROUND_POWER_TENS_INT31 = new int[MAX_POWER_TEN_INT31 + 1];
/** 10^38 fits in UnsignedInt128. */
public static final int MAX_POWER_TEN_INT128 = 38;
/** 10^x. */
public static final UnsignedInt128[] POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1];
/** 5 * 10^(x-1). */
public static final UnsignedInt128[] ROUND_POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1];
/**
* 1/10^x, scaled to 128bits, also word-shifted for better accuracy. Because
* of flooring, this is same or smaller than real value.
*/
public static final UnsignedInt128[] INVERSE_POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1];
/** number of words shifted up in each INVERSE_POWER_TENS_INT128. */
public static final int[] INVERSE_POWER_TENS_INT128_WORD_SHIFTS = new int[MAX_POWER_TEN_INT128 + 1];
/** To quickly calculate bit length for up to 256. */
private static final byte[] BIT_LENGTH;
/** Used in division. */
private static final long BASE = (1L << 32);
/**
* Turn on or off the highest bit of an int value.
*
* @param val
* the value to modify
* @param positive
* whether to turn off (positive) or on (negative).
* @return unsigned int value
*/
public static int setSignBitInt(int val, boolean positive) {
if (positive) {
return val & FULLBITS_31;
}
return val | NEGATIVE_INT_MASK;
}
/**
* Turn on or off the highest bit of a long value.
*
* @param val
* the value to modify
* @param positive
* whether to turn off (positive) or on (negative).
* @return unsigned long value
*/
public static long setSignBitLong(long val, boolean positive) {
if (positive) {
return val & FULLBITS_63;
}
return val | NEGATIVE_LONG_MASK;
}
/**
* Returns the minimal number of bits to represent the given integer value.
*
* @param word
* int32 value
* @return the minimal number of bits to represent the given integer value
*/
public static short bitLengthInWord(int word) {
if (word < 0) {
return 32;
}
if (word < (1 << 16)) {
if (word < 1 << 8) {
return BIT_LENGTH[word];
} else {
return (short) (BIT_LENGTH[word >>> 8] + 8);
}
} else {
if (word < (1 << 24)) {
return (short) (BIT_LENGTH[word >>> 16] + 16);
} else {
return (short) (BIT_LENGTH[word >>> 24] + 24);
}
}
}
/**
* Returns the minimal number of bits to represent the words.
*
* @param v0
* v0
* @param v1
* v1
* @param v2
* v2
* @param v3
* v3
* @return the minimal number of bits to represent the words
*/
public static short bitLength(int v0, int v1, int v2, int v3) {
if (v3 != 0) {
return (short) (bitLengthInWord(v3) + 96);
}
if (v2 != 0) {
return (short) (bitLengthInWord(v2) + 64);
}
if (v1 != 0) {
return (short) (bitLengthInWord(v1) + 32);
}
return bitLengthInWord(v0);
}
/**
* If we can assume JDK 1.8, this should use
* java.lang.Integer.compareUnsigned(), which will be replaced with intrinsics
* in JVM.
*
* @param x
* the first {@code int} to compare
* @param y
* the second {@code int} to compare
* @return the value {@code 0} if {@code x == y}; a value less than {@code 0}
* if {@code x < y} as unsigned values; and a value greater than
* {@code 0} if {@code x > y} as unsigned values
* @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524"
*/
public static int compareUnsignedInt(int x, int y) {
// Can't assume JDK 1.8, so implementing this explicitly.
// return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE);
if (x == y) {
return 0;
}
if (x + Integer.MIN_VALUE < y + Integer.MIN_VALUE) {
return -1;
} else {
return 1;
}
}
/**
* If we can assume JDK 1.8, this should use java.lang.Long.compareUnsigned(),
* which will be replaced with intrinsics in JVM.
*
* @param x
* the first {@code int} to compare
* @param y
* the second {@code int} to compare
* @return the value {@code 0} if {@code x == y}; a value less than {@code 0}
* if {@code x < y} as unsigned values; and a value greater than
* {@code 0} if {@code x > y} as unsigned values
* @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524"
*/
public static int compareUnsignedLong(long x, long y) {
// Can't assume JDK 1.8, so implementing this explicitly.
// return Long.compare(x + Long.MIN_VALUE, y + Long.MIN_VALUE);
if (x == y) {
return 0;
}
if (x + Long.MIN_VALUE < y + Long.MIN_VALUE) {
return -1;
} else {
return 1;
}
}
/**
* If we can assume JDK 1.8, this should use java.lang.Long.divideUnsigned(),
* which will be replaced with intrinsics in JVM.
*
* @param dividend
* the value to be divided
* @param divisor
* the value doing the dividing
* @return the unsigned quotient of the first argument divided by the second
* argument
* @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524"
*/
public static long divideUnsignedLong(long dividend, long divisor) {
if (divisor < 0L) {
// Answer must be 0 or 1 depending on relative magnitude
// of dividend and divisor.
return (compareUnsignedLong(dividend, divisor)) < 0 ? 0L : 1L;
}
if (dividend >= 0) { // Both inputs non-negative
return dividend / divisor;
} else {
// simple division.
// Yes, we should do something like this:
// http://www.hackersdelight.org/divcMore.pdf
// but later... (anyway this will be eventually replaced by
// intrinsics in Java 8)
// an equivalent algorithm exists in
// com.google.common.primitives.UnsingedLongs
long quotient = ((dividend >>> 1L) / divisor) << 1L;
long remainder = dividend - quotient * divisor;
if (compareUnsignedLong(remainder, divisor) >= 0) {
return quotient + 1;
}
return quotient;
}
}
/**
* If we can assume JDK 1.8, this should use
* java.lang.Long.remainderUnsigned(), which will be replaced with intrinsics
* in JVM.
*
* @param dividend
* the value to be divided
* @param divisor
* the value doing the dividing
* @return the unsigned remainder of the first argument divided by the second
* argument
* @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524"
*/
public static long remainderUnsignedLong(long dividend, long divisor) {
if (divisor < 0L) {
// because divisor is negative, quotient is at most 1.
// remainder must be dividend itself (quotient=0), or dividend -
// divisor
return (compareUnsignedLong(dividend, divisor)) < 0 ? dividend : dividend
- divisor;
}
if (dividend >= 0L) { // signed comparisons
return dividend % divisor;
} else {
// same above
long quotient = ((dividend >>> 1L) / divisor) << 1L;
long remainder = dividend - quotient * divisor;
if (compareUnsignedLong(remainder, divisor) >= 0) {
return remainder - divisor;
}
return remainder;
}
}
/**
* @param lo
* low 32bit
* @param hi
* high 32bit
* @return long value that combines the two integers
*/
public static long combineInts(int lo, int hi) {
return ((hi & LONG_MASK) << 32L) | (lo & LONG_MASK);
}
/**
* @param val
* long value
* @return high 32bit of the given value
*/
public static int extractHiInt(long val) {
return (int) (val >> 32);
}
/**
* @param val
* long value
* @return low 32bit of the given value
*/
public static int extractLowInt(long val) {
return (int) val;
}
/** Throws an overflow exception. */
static void throwOverflowException() {
throw new ArithmeticException("Overflow");
}
/** Throws a divide-by-zero exception. */
static void throwZeroDivisionException() {
throw new ArithmeticException("Divide by zero");
}
/**
* Multi-precision one super-digit multiply in place.
*
* @param inOut
* @param multiplier
*/
private static void multiplyMultiPrecision(int[] inOut, int multiplier) {
long multiplierUnsigned = multiplier & SqlMathUtil.LONG_MASK;
long product = 0L;
for (int i = 0; i < inOut.length; ++i) {
product = (inOut[i] & SqlMathUtil.LONG_MASK) * multiplierUnsigned
+ (product >>> 32);
inOut[i] = (int) product;
}
if ((product >> 32) != 0) {
SqlMathUtil.throwOverflowException();
}
}
/**
* Multi-precision one super-digit divide in place.
*
* @param inOut
* @param divisor
* @return
*/
private static int divideMultiPrecision(int[] inOut, int divisor) {
long divisorUnsigned = divisor & SqlMathUtil.LONG_MASK;
long quotient;
long remainder = 0;
for (int i = inOut.length - 1; i >= 0; --i) {
remainder = (inOut[i] & SqlMathUtil.LONG_MASK) + (remainder << 32);
quotient = remainder / divisorUnsigned;
inOut[i] = (int) quotient;
remainder %= divisorUnsigned;
}
return (int) remainder;
}
/**
* Returns length of the array discounting the trailing elements with zero value.
*/
private static int arrayValidLength(int[] array) {
int len = array.length;
while (len > 0 && array[len - 1] == 0) {
--len;
}
return len <= 0 ? 0 : len;
}
/**
* Multi-precision divide. dividend and divisor not changed. Assumes that
* there is enough room in quotient for results. Drawbacks of this
* implementation: 1) Need one extra super-digit in R 2) As it modifies D
* during work, then it restores it back (this is necessary because the caller
* doesn't expect D to change) 3) Always get Q and R - if R is unnecessary,
* can be slightly faster.
*
* @param dividend
* dividend. in.
* @param divisor
* divisor. in.
* @param quotient
* quotient. out.
* @return remainder
*/
public static int[] divideMultiPrecision(int[] dividend, int[] divisor,
int[] quotient) {
final int dividendLength = arrayValidLength(dividend);
final int divisorLength = arrayValidLength(divisor);
Arrays.fill(quotient, 0);
// Remainder := Dividend
int[] remainder = new int[dividend.length + 1];
System.arraycopy(dividend, 0, remainder, 0, dividend.length);
remainder[remainder.length - 1] = 0;
if (divisorLength == 0) {
throwZeroDivisionException();
}
if (dividendLength < divisorLength) {
return remainder;
}
if (divisorLength == 1) {
int rem = divideMultiPrecision(remainder, divisor[0]);
System.arraycopy(remainder, 0, quotient, 0, quotient.length);
Arrays.fill(remainder, 0);
remainder[0] = rem;
return remainder;
}
// Knuth, "The Art of Computer Programming", 3rd edition, vol.II, Alg.D,
// pg 272
// D1. Normalize so high digit of D >= BASE/2 - that guarantee
// that QH will not be too far from the correct digit later in D3
int d1 = (int) (BASE / ((divisor[divisorLength - 1] & LONG_MASK) + 1L));
if (d1 > 1) {
// We are modifying divisor here, so make a local copy.
int[] newDivisor = new int[divisorLength];
System.arraycopy(divisor, 0, newDivisor, 0, divisorLength);
multiplyMultiPrecision(newDivisor, d1);
divisor = newDivisor;
multiplyMultiPrecision(remainder, d1);
}
// only 32bits, but long to behave as unsigned
long dHigh = (divisor[divisorLength - 1] & LONG_MASK);
long dLow = (divisor[divisorLength - 2] & LONG_MASK);
// D2 already done - iulRindex initialized before normalization of R.
// D3-D7. Loop on iulRindex - obtaining digits one-by-one, as "in paper"
for (int rIndex = remainder.length - 1; rIndex >= divisorLength; --rIndex) {
// D3. Calculate Q hat - estimation of the next digit
long accum = combineInts(remainder[rIndex - 1], remainder[rIndex]);
int qhat;
if (dHigh == (remainder[rIndex] & LONG_MASK)) {
qhat = (int) (BASE - 1);
} else {
qhat = (int) divideUnsignedLong(accum, dHigh);
}
int rhat = (int) (accum - (qhat & LONG_MASK) * dHigh);
while (compareUnsignedLong(dLow * (qhat & LONG_MASK),
combineInts(remainder[rIndex - 2], rhat)) > 0) {
qhat--;
if ((rhat & LONG_MASK) >= -((int) dHigh)) {
break;
}
rhat += dHigh;
}
// D4. Multiply and subtract: (some digits of) R -= D * QH
long dwlMulAccum = 0;
accum = BASE;
int iulRwork = rIndex - divisorLength;
for (int dIndex = 0; dIndex < divisorLength; dIndex++, iulRwork++) {
dwlMulAccum += (qhat & LONG_MASK) * (divisor[dIndex] & LONG_MASK);
accum += (remainder[iulRwork] & LONG_MASK)
- (extractLowInt(dwlMulAccum) & LONG_MASK);
dwlMulAccum = (extractHiInt(dwlMulAccum) & LONG_MASK);
remainder[iulRwork] = extractLowInt(accum);
accum = (extractHiInt(accum) & LONG_MASK) + BASE - 1;
}
accum += (remainder[iulRwork] & LONG_MASK) - dwlMulAccum;
remainder[iulRwork] = extractLowInt(accum);
quotient[rIndex - divisorLength] = qhat;
// D5. Test remainder. Carry indicates result<0, therefore QH 1 too
// large
if (extractHiInt(accum) == 0) {
// D6. Add back - probability is 2**(-31). R += D. Q[digit] -= 1
quotient[rIndex - divisorLength] = qhat - 1;
int carry = 0;
int dIndex = 0;
for (iulRwork = rIndex - divisorLength; dIndex < divisorLength; dIndex++, iulRwork++) {
long accum2 = (divisor[dIndex] & LONG_MASK)
+ (remainder[iulRwork] & LONG_MASK) + (carry & LONG_MASK);
carry = extractHiInt(accum2);
remainder[iulRwork] = extractLowInt(accum2);
}
remainder[iulRwork] += carry;
}
}
// D8. Unnormalize: Divide R to get result
if (d1 > 1) {
divideMultiPrecision(remainder, d1);
}
return remainder;
}
static {
BIT_LENGTH = new byte[256];
BIT_LENGTH[0] = 0;
for (int i = 1; i < 8; ++i) {
for (int j = 1 << (i - 1); j < 1 << i; ++j) {
BIT_LENGTH[j] = (byte) i;
}
}
POWER_FIVES_INT31[0] = 1;
for (int i = 1; i < POWER_FIVES_INT31.length; ++i) {
POWER_FIVES_INT31[i] = POWER_FIVES_INT31[i - 1] * 5;
assert (POWER_FIVES_INT31[i] > 0);
}
POWER_FIVES_INT63[0] = 1L;
for (int i = 1; i < POWER_FIVES_INT63.length; ++i) {
POWER_FIVES_INT63[i] = POWER_FIVES_INT63[i - 1] * 5L;
assert (POWER_FIVES_INT63[i] > 0L);
}
POWER_TENS_INT31[0] = 1;
ROUND_POWER_TENS_INT31[0] = 0;
for (int i = 1; i < POWER_TENS_INT31.length; ++i) {
POWER_TENS_INT31[i] = POWER_TENS_INT31[i - 1] * 10;
assert (POWER_TENS_INT31[i] > 0);
ROUND_POWER_TENS_INT31[i] = POWER_TENS_INT31[i] >> 1;
}
POWER_FIVES_INT128[0] = new UnsignedInt128(1);
INVERSE_POWER_FIVES_INT128[0] = new UnsignedInt128(0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF);
for (int i = 1; i < POWER_FIVES_INT128.length; ++i) {
POWER_FIVES_INT128[i] = new UnsignedInt128(POWER_FIVES_INT128[i - 1]);
POWER_FIVES_INT128[i].multiplyDestructive(5);
INVERSE_POWER_FIVES_INT128[i] = new UnsignedInt128(
INVERSE_POWER_FIVES_INT128[i - 1]);
INVERSE_POWER_FIVES_INT128[i].divideDestructive(5);
}
POWER_TENS_INT128[0] = new UnsignedInt128(1);
ROUND_POWER_TENS_INT128[0] = new UnsignedInt128(0);
INVERSE_POWER_TENS_INT128[0] = new UnsignedInt128(0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF);
INVERSE_POWER_TENS_INT128_WORD_SHIFTS[0] = 0;
int[] inverseTens = new int[8];
Arrays.fill(inverseTens, 0xFFFFFFFF);
for (int i = 1; i < POWER_TENS_INT128.length; ++i) {
final int divisor = 10;
POWER_TENS_INT128[i] = new UnsignedInt128(POWER_TENS_INT128[i - 1]);
POWER_TENS_INT128[i].multiplyDestructive(divisor);
ROUND_POWER_TENS_INT128[i] = POWER_TENS_INT128[i].shiftRightConstructive(
1, false);
long quotient;
long remainder = 0;
for (int j = inverseTens.length - 1; j >= 0; --j) {
quotient = ((inverseTens[j] & SqlMathUtil.LONG_MASK) + (remainder << 32))
/ divisor;
remainder = ((inverseTens[j] & SqlMathUtil.LONG_MASK) + (remainder << 32))
% divisor;
inverseTens[j] = (int) quotient;
}
int wordShifts = 0;
for (int j = inverseTens.length - 1; j >= 4 && inverseTens[j] == 0; --j) {
++wordShifts;
}
INVERSE_POWER_TENS_INT128_WORD_SHIFTS[i] = wordShifts;
INVERSE_POWER_TENS_INT128[i] = new UnsignedInt128(
inverseTens[inverseTens.length - 4 - wordShifts],
inverseTens[inverseTens.length - 3 - wordShifts],
inverseTens[inverseTens.length - 2 - wordShifts],
inverseTens[inverseTens.length - 1 - wordShifts]);
}
}
private SqlMathUtil() {
}
}