/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.common.type; import java.util.Arrays; /** * This code was based on code from Microsoft's PolyBase. * * Misc utilities used in this package. */ public final class SqlMathUtil { /** Mask to convert a long to a negative long. */ public static final long NEGATIVE_LONG_MASK = 0x8000000000000000L; /** Mask to convert a long to an unsigned long. */ public static final long FULLBITS_63 = 0x7FFFFFFFFFFFFFFFL; /** Mask to convert an int to a negative int. */ public static final int NEGATIVE_INT_MASK = 0x80000000; /** Mask to convert signed integer to unsigned long. */ public static final long LONG_MASK = 0xFFFFFFFFL; /** Mask to convert an int to an unsigned int. */ public static final int FULLBITS_31 = 0x7FFFFFFF; /** Max unsigned integer. */ public static final int FULLBITS_32 = 0xFFFFFFFF; /** 5^13 fits in 2^31. */ public static final int MAX_POWER_FIVE_INT31 = 13; /** 5^x. All unsigned values. */ public static final int[] POWER_FIVES_INT31 = new int[MAX_POWER_FIVE_INT31 + 1]; /** 5^27 fits in 2^63. */ public static final int MAX_POWER_FIVE_INT63 = 27; /** 5^x. All unsigned values. */ public static final long[] POWER_FIVES_INT63 = new long[MAX_POWER_FIVE_INT63 + 1]; /** 5^55 fits in 2^128. */ public static final int MAX_POWER_FIVE_INT128 = 55; /** 5^x. */ public static final UnsignedInt128[] POWER_FIVES_INT128 = new UnsignedInt128[MAX_POWER_FIVE_INT128 + 1]; /** * 1/5^x, scaled to 128bits (in other words, 2^128/5^x). Because of flooring, * this is same or smaller than real value. */ public static final UnsignedInt128[] INVERSE_POWER_FIVES_INT128 = new UnsignedInt128[MAX_POWER_FIVE_INT128 + 1]; /** 10^9 fits in 2^31. */ public static final int MAX_POWER_TEN_INT31 = 9; /** 10^x. All unsigned values. */ public static final int[] POWER_TENS_INT31 = new int[MAX_POWER_TEN_INT31 + 1]; /** 5 * 10^(x-1). */ public static final int[] ROUND_POWER_TENS_INT31 = new int[MAX_POWER_TEN_INT31 + 1]; /** 10^38 fits in UnsignedInt128. */ public static final int MAX_POWER_TEN_INT128 = 38; /** 10^x. */ public static final UnsignedInt128[] POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1]; /** 5 * 10^(x-1). */ public static final UnsignedInt128[] ROUND_POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1]; /** * 1/10^x, scaled to 128bits, also word-shifted for better accuracy. Because * of flooring, this is same or smaller than real value. */ public static final UnsignedInt128[] INVERSE_POWER_TENS_INT128 = new UnsignedInt128[MAX_POWER_TEN_INT128 + 1]; /** number of words shifted up in each INVERSE_POWER_TENS_INT128. */ public static final int[] INVERSE_POWER_TENS_INT128_WORD_SHIFTS = new int[MAX_POWER_TEN_INT128 + 1]; /** To quickly calculate bit length for up to 256. */ private static final byte[] BIT_LENGTH; /** Used in division. */ private static final long BASE = (1L << 32); /** * Turn on or off the highest bit of an int value. * * @param val * the value to modify * @param positive * whether to turn off (positive) or on (negative). * @return unsigned int value */ public static int setSignBitInt(int val, boolean positive) { if (positive) { return val & FULLBITS_31; } return val | NEGATIVE_INT_MASK; } /** * Turn on or off the highest bit of a long value. * * @param val * the value to modify * @param positive * whether to turn off (positive) or on (negative). * @return unsigned long value */ public static long setSignBitLong(long val, boolean positive) { if (positive) { return val & FULLBITS_63; } return val | NEGATIVE_LONG_MASK; } /** * Returns the minimal number of bits to represent the given integer value. * * @param word * int32 value * @return the minimal number of bits to represent the given integer value */ public static short bitLengthInWord(int word) { if (word < 0) { return 32; } if (word < (1 << 16)) { if (word < 1 << 8) { return BIT_LENGTH[word]; } else { return (short) (BIT_LENGTH[word >>> 8] + 8); } } else { if (word < (1 << 24)) { return (short) (BIT_LENGTH[word >>> 16] + 16); } else { return (short) (BIT_LENGTH[word >>> 24] + 24); } } } /** * Returns the minimal number of bits to represent the words. * * @param v0 * v0 * @param v1 * v1 * @param v2 * v2 * @param v3 * v3 * @return the minimal number of bits to represent the words */ public static short bitLength(int v0, int v1, int v2, int v3) { if (v3 != 0) { return (short) (bitLengthInWord(v3) + 96); } if (v2 != 0) { return (short) (bitLengthInWord(v2) + 64); } if (v1 != 0) { return (short) (bitLengthInWord(v1) + 32); } return bitLengthInWord(v0); } /** * If we can assume JDK 1.8, this should use * java.lang.Integer.compareUnsigned(), which will be replaced with intrinsics * in JVM. * * @param x * the first {@code int} to compare * @param y * the second {@code int} to compare * @return the value {@code 0} if {@code x == y}; a value less than {@code 0} * if {@code x < y} as unsigned values; and a value greater than * {@code 0} if {@code x > y} as unsigned values * @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524" */ public static int compareUnsignedInt(int x, int y) { // Can't assume JDK 1.8, so implementing this explicitly. // return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE); if (x == y) { return 0; } if (x + Integer.MIN_VALUE < y + Integer.MIN_VALUE) { return -1; } else { return 1; } } /** * If we can assume JDK 1.8, this should use java.lang.Long.compareUnsigned(), * which will be replaced with intrinsics in JVM. * * @param x * the first {@code int} to compare * @param y * the second {@code int} to compare * @return the value {@code 0} if {@code x == y}; a value less than {@code 0} * if {@code x < y} as unsigned values; and a value greater than * {@code 0} if {@code x > y} as unsigned values * @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524" */ public static int compareUnsignedLong(long x, long y) { // Can't assume JDK 1.8, so implementing this explicitly. // return Long.compare(x + Long.MIN_VALUE, y + Long.MIN_VALUE); if (x == y) { return 0; } if (x + Long.MIN_VALUE < y + Long.MIN_VALUE) { return -1; } else { return 1; } } /** * If we can assume JDK 1.8, this should use java.lang.Long.divideUnsigned(), * which will be replaced with intrinsics in JVM. * * @param dividend * the value to be divided * @param divisor * the value doing the dividing * @return the unsigned quotient of the first argument divided by the second * argument * @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524" */ public static long divideUnsignedLong(long dividend, long divisor) { if (divisor < 0L) { // Answer must be 0 or 1 depending on relative magnitude // of dividend and divisor. return (compareUnsignedLong(dividend, divisor)) < 0 ? 0L : 1L; } if (dividend >= 0) { // Both inputs non-negative return dividend / divisor; } else { // simple division. // Yes, we should do something like this: // http://www.hackersdelight.org/divcMore.pdf // but later... (anyway this will be eventually replaced by // intrinsics in Java 8) // an equivalent algorithm exists in // com.google.common.primitives.UnsingedLongs long quotient = ((dividend >>> 1L) / divisor) << 1L; long remainder = dividend - quotient * divisor; if (compareUnsignedLong(remainder, divisor) >= 0) { return quotient + 1; } return quotient; } } /** * If we can assume JDK 1.8, this should use * java.lang.Long.remainderUnsigned(), which will be replaced with intrinsics * in JVM. * * @param dividend * the value to be divided * @param divisor * the value doing the dividing * @return the unsigned remainder of the first argument divided by the second * argument * @see "http://hg.openjdk.java.net/jdk8/tl/jdk/rev/71200c517524" */ public static long remainderUnsignedLong(long dividend, long divisor) { if (divisor < 0L) { // because divisor is negative, quotient is at most 1. // remainder must be dividend itself (quotient=0), or dividend - // divisor return (compareUnsignedLong(dividend, divisor)) < 0 ? dividend : dividend - divisor; } if (dividend >= 0L) { // signed comparisons return dividend % divisor; } else { // same above long quotient = ((dividend >>> 1L) / divisor) << 1L; long remainder = dividend - quotient * divisor; if (compareUnsignedLong(remainder, divisor) >= 0) { return remainder - divisor; } return remainder; } } /** * @param lo * low 32bit * @param hi * high 32bit * @return long value that combines the two integers */ public static long combineInts(int lo, int hi) { return ((hi & LONG_MASK) << 32L) | (lo & LONG_MASK); } /** * @param val * long value * @return high 32bit of the given value */ public static int extractHiInt(long val) { return (int) (val >> 32); } /** * @param val * long value * @return low 32bit of the given value */ public static int extractLowInt(long val) { return (int) val; } /** Throws an overflow exception. */ static void throwOverflowException() { throw new ArithmeticException("Overflow"); } /** Throws a divide-by-zero exception. */ static void throwZeroDivisionException() { throw new ArithmeticException("Divide by zero"); } /** * Multi-precision one super-digit multiply in place. * * @param inOut * @param multiplier */ private static void multiplyMultiPrecision(int[] inOut, int multiplier) { long multiplierUnsigned = multiplier & SqlMathUtil.LONG_MASK; long product = 0L; for (int i = 0; i < inOut.length; ++i) { product = (inOut[i] & SqlMathUtil.LONG_MASK) * multiplierUnsigned + (product >>> 32); inOut[i] = (int) product; } if ((product >> 32) != 0) { SqlMathUtil.throwOverflowException(); } } /** * Multi-precision one super-digit divide in place. * * @param inOut * @param divisor * @return */ private static int divideMultiPrecision(int[] inOut, int divisor) { long divisorUnsigned = divisor & SqlMathUtil.LONG_MASK; long quotient; long remainder = 0; for (int i = inOut.length - 1; i >= 0; --i) { remainder = (inOut[i] & SqlMathUtil.LONG_MASK) + (remainder << 32); quotient = remainder / divisorUnsigned; inOut[i] = (int) quotient; remainder %= divisorUnsigned; } return (int) remainder; } /** * Returns length of the array discounting the trailing elements with zero value. */ private static int arrayValidLength(int[] array) { int len = array.length; while (len > 0 && array[len - 1] == 0) { --len; } return len <= 0 ? 0 : len; } /** * Multi-precision divide. dividend and divisor not changed. Assumes that * there is enough room in quotient for results. Drawbacks of this * implementation: 1) Need one extra super-digit in R 2) As it modifies D * during work, then it restores it back (this is necessary because the caller * doesn't expect D to change) 3) Always get Q and R - if R is unnecessary, * can be slightly faster. * * @param dividend * dividend. in. * @param divisor * divisor. in. * @param quotient * quotient. out. * @return remainder */ public static int[] divideMultiPrecision(int[] dividend, int[] divisor, int[] quotient) { final int dividendLength = arrayValidLength(dividend); final int divisorLength = arrayValidLength(divisor); Arrays.fill(quotient, 0); // Remainder := Dividend int[] remainder = new int[dividend.length + 1]; System.arraycopy(dividend, 0, remainder, 0, dividend.length); remainder[remainder.length - 1] = 0; if (divisorLength == 0) { throwZeroDivisionException(); } if (dividendLength < divisorLength) { return remainder; } if (divisorLength == 1) { int rem = divideMultiPrecision(remainder, divisor[0]); System.arraycopy(remainder, 0, quotient, 0, quotient.length); Arrays.fill(remainder, 0); remainder[0] = rem; return remainder; } // Knuth, "The Art of Computer Programming", 3rd edition, vol.II, Alg.D, // pg 272 // D1. Normalize so high digit of D >= BASE/2 - that guarantee // that QH will not be too far from the correct digit later in D3 int d1 = (int) (BASE / ((divisor[divisorLength - 1] & LONG_MASK) + 1L)); if (d1 > 1) { // We are modifying divisor here, so make a local copy. int[] newDivisor = new int[divisorLength]; System.arraycopy(divisor, 0, newDivisor, 0, divisorLength); multiplyMultiPrecision(newDivisor, d1); divisor = newDivisor; multiplyMultiPrecision(remainder, d1); } // only 32bits, but long to behave as unsigned long dHigh = (divisor[divisorLength - 1] & LONG_MASK); long dLow = (divisor[divisorLength - 2] & LONG_MASK); // D2 already done - iulRindex initialized before normalization of R. // D3-D7. Loop on iulRindex - obtaining digits one-by-one, as "in paper" for (int rIndex = remainder.length - 1; rIndex >= divisorLength; --rIndex) { // D3. Calculate Q hat - estimation of the next digit long accum = combineInts(remainder[rIndex - 1], remainder[rIndex]); int qhat; if (dHigh == (remainder[rIndex] & LONG_MASK)) { qhat = (int) (BASE - 1); } else { qhat = (int) divideUnsignedLong(accum, dHigh); } int rhat = (int) (accum - (qhat & LONG_MASK) * dHigh); while (compareUnsignedLong(dLow * (qhat & LONG_MASK), combineInts(remainder[rIndex - 2], rhat)) > 0) { qhat--; if ((rhat & LONG_MASK) >= -((int) dHigh)) { break; } rhat += dHigh; } // D4. Multiply and subtract: (some digits of) R -= D * QH long dwlMulAccum = 0; accum = BASE; int iulRwork = rIndex - divisorLength; for (int dIndex = 0; dIndex < divisorLength; dIndex++, iulRwork++) { dwlMulAccum += (qhat & LONG_MASK) * (divisor[dIndex] & LONG_MASK); accum += (remainder[iulRwork] & LONG_MASK) - (extractLowInt(dwlMulAccum) & LONG_MASK); dwlMulAccum = (extractHiInt(dwlMulAccum) & LONG_MASK); remainder[iulRwork] = extractLowInt(accum); accum = (extractHiInt(accum) & LONG_MASK) + BASE - 1; } accum += (remainder[iulRwork] & LONG_MASK) - dwlMulAccum; remainder[iulRwork] = extractLowInt(accum); quotient[rIndex - divisorLength] = qhat; // D5. Test remainder. Carry indicates result<0, therefore QH 1 too // large if (extractHiInt(accum) == 0) { // D6. Add back - probability is 2**(-31). R += D. Q[digit] -= 1 quotient[rIndex - divisorLength] = qhat - 1; int carry = 0; int dIndex = 0; for (iulRwork = rIndex - divisorLength; dIndex < divisorLength; dIndex++, iulRwork++) { long accum2 = (divisor[dIndex] & LONG_MASK) + (remainder[iulRwork] & LONG_MASK) + (carry & LONG_MASK); carry = extractHiInt(accum2); remainder[iulRwork] = extractLowInt(accum2); } remainder[iulRwork] += carry; } } // D8. Unnormalize: Divide R to get result if (d1 > 1) { divideMultiPrecision(remainder, d1); } return remainder; } static { BIT_LENGTH = new byte[256]; BIT_LENGTH[0] = 0; for (int i = 1; i < 8; ++i) { for (int j = 1 << (i - 1); j < 1 << i; ++j) { BIT_LENGTH[j] = (byte) i; } } POWER_FIVES_INT31[0] = 1; for (int i = 1; i < POWER_FIVES_INT31.length; ++i) { POWER_FIVES_INT31[i] = POWER_FIVES_INT31[i - 1] * 5; assert (POWER_FIVES_INT31[i] > 0); } POWER_FIVES_INT63[0] = 1L; for (int i = 1; i < POWER_FIVES_INT63.length; ++i) { POWER_FIVES_INT63[i] = POWER_FIVES_INT63[i - 1] * 5L; assert (POWER_FIVES_INT63[i] > 0L); } POWER_TENS_INT31[0] = 1; ROUND_POWER_TENS_INT31[0] = 0; for (int i = 1; i < POWER_TENS_INT31.length; ++i) { POWER_TENS_INT31[i] = POWER_TENS_INT31[i - 1] * 10; assert (POWER_TENS_INT31[i] > 0); ROUND_POWER_TENS_INT31[i] = POWER_TENS_INT31[i] >> 1; } POWER_FIVES_INT128[0] = new UnsignedInt128(1); INVERSE_POWER_FIVES_INT128[0] = new UnsignedInt128(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); for (int i = 1; i < POWER_FIVES_INT128.length; ++i) { POWER_FIVES_INT128[i] = new UnsignedInt128(POWER_FIVES_INT128[i - 1]); POWER_FIVES_INT128[i].multiplyDestructive(5); INVERSE_POWER_FIVES_INT128[i] = new UnsignedInt128( INVERSE_POWER_FIVES_INT128[i - 1]); INVERSE_POWER_FIVES_INT128[i].divideDestructive(5); } POWER_TENS_INT128[0] = new UnsignedInt128(1); ROUND_POWER_TENS_INT128[0] = new UnsignedInt128(0); INVERSE_POWER_TENS_INT128[0] = new UnsignedInt128(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); INVERSE_POWER_TENS_INT128_WORD_SHIFTS[0] = 0; int[] inverseTens = new int[8]; Arrays.fill(inverseTens, 0xFFFFFFFF); for (int i = 1; i < POWER_TENS_INT128.length; ++i) { final int divisor = 10; POWER_TENS_INT128[i] = new UnsignedInt128(POWER_TENS_INT128[i - 1]); POWER_TENS_INT128[i].multiplyDestructive(divisor); ROUND_POWER_TENS_INT128[i] = POWER_TENS_INT128[i].shiftRightConstructive( 1, false); long quotient; long remainder = 0; for (int j = inverseTens.length - 1; j >= 0; --j) { quotient = ((inverseTens[j] & SqlMathUtil.LONG_MASK) + (remainder << 32)) / divisor; remainder = ((inverseTens[j] & SqlMathUtil.LONG_MASK) + (remainder << 32)) % divisor; inverseTens[j] = (int) quotient; } int wordShifts = 0; for (int j = inverseTens.length - 1; j >= 4 && inverseTens[j] == 0; --j) { ++wordShifts; } INVERSE_POWER_TENS_INT128_WORD_SHIFTS[i] = wordShifts; INVERSE_POWER_TENS_INT128[i] = new UnsignedInt128( inverseTens[inverseTens.length - 4 - wordShifts], inverseTens[inverseTens.length - 3 - wordShifts], inverseTens[inverseTens.length - 2 - wordShifts], inverseTens[inverseTens.length - 1 - wordShifts]); } } private SqlMathUtil() { } }