/* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. ElasticSearch licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.fastcatsearch.common; /** * */ public class Bytes { /** * Returns an array size >= minTargetSize, generally * over-allocating exponentially to achieve amortized * linear-time cost as the array grows. * <p/> * NOTE: this was originally borrowed from Python 2.4.2 * listobject.c sources (attribution in LICENSE.txt), but * has now been substantially changed based on * discussions from java-dev thread with subject "Dynamic * array reallocation algorithms", started on Jan 12 * 2010. * * @param minTargetSize Minimum required value to be returned. * @param bytesPerElement Bytes used by each element of * the array. See constants in {@link RamUsageEstimator}. * @lucene.internal */ public static int oversize(int minTargetSize, int bytesPerElement) { if (minTargetSize < 0) { // catch usage that accidentally overflows int throw new IllegalArgumentException("invalid array size " + minTargetSize); } if (minTargetSize == 0) { // wait until at least one element is requested return 0; } // asymptotic exponential growth by 1/8th, favors // spending a bit more CPU to not tie up too much wasted // RAM: int extra = minTargetSize >> 3; if (extra < 3) { // for very small arrays, where constant overhead of // realloc is presumably relatively high, we grow // faster extra = 3; } int newSize = minTargetSize + extra; // add 7 to allow for worst case byte alignment addition below: if (newSize + 7 < 0) { // int overflowed -- return max allowed array size return Integer.MAX_VALUE; } // if (JvmUtils.JRE_IS_64BIT) { // round up to 8 byte alignment in 64bit env switch (bytesPerElement) { case 4: // round up to multiple of 2 return (newSize + 1) & 0x7ffffffe; case 2: // round up to multiple of 4 return (newSize + 3) & 0x7ffffffc; case 1: // round up to multiple of 8 return (newSize + 7) & 0x7ffffff8; case 8: // no rounding default: // odd (invalid?) size return newSize; } // } else { // // round up to 4 byte alignment in 64bit env // switch (bytesPerElement) { // case 2: // // round up to multiple of 2 // return (newSize + 1) & 0x7ffffffe; // case 1: // // round up to multiple of 4 // return (newSize + 3) & 0x7ffffffc; // case 4: // case 8: // // no rounding // default: // // odd (invalid?) size // return newSize; // } // } } public static final byte[] EMPTY_ARRAY = new byte[0]; final static int[] sizeTable = {9, 99, 999, 9999, 99999, 999999, 9999999, 99999999, 999999999, Integer.MAX_VALUE}; private static final byte[] LONG_MIN_VALUE_BYTES = "-9223372036854775808".getBytes(); // Requires positive x static int stringSize(int x) { for (int i = 0; ; i++) if (x <= sizeTable[i]) return i + 1; } /** * Blatant copy of Integer.toString, but returning a byte array instead of a String, as * string charset decoding/encoding was killing us on performance. * * @param i integer to convert * @return byte[] array containing literal ASCII char representation */ public static byte[] itoa(int i) { int size = (i < 0) ? stringSize(-i) + 1 : stringSize(i); byte[] buf = new byte[size]; getChars(i, size, buf); return buf; } final static byte[] digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' }; final static byte[] DigitTens = { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', }; final static byte[] DigitOnes = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', }; static void getChars(int i, int index, byte[] buf) { int q, r; int charPos = index; byte sign = 0; if (i < 0) { sign = '-'; i = -i; } // Generate two digits per iteration while (i >= 65536) { q = i / 100; // really: r = i - (q * 100); r = i - ((q << 6) + (q << 5) + (q << 2)); i = q; buf[--charPos] = DigitOnes[r]; buf[--charPos] = DigitTens[r]; } // Fall thru to fast mode for smaller numbers // assert(i <= 65536, i); for (; ; ) { q = (i * 52429) >>> (16 + 3); r = i - ((q << 3) + (q << 1)); // r = i-(q*10) ... buf[--charPos] = digits[r]; i = q; if (i == 0) break; } if (sign != 0) { buf[--charPos] = sign; } } public static int atoi(byte[] s) throws NumberFormatException { int result = 0; boolean negative = false; int i = 0, len = s.length; int limit = -Integer.MAX_VALUE; int multmin; int digit; if (len > 0) { byte firstChar = s[0]; if (firstChar < '0') { // Possible leading "-" if (firstChar == '-') { negative = true; limit = Integer.MIN_VALUE; } else throw new NumberFormatException(); if (len == 1) // Cannot have lone "-" throw new NumberFormatException(); i++; } multmin = limit / 10; while (i < len) { // Accumulating negatively avoids surprises near MAX_VALUE digit = Character.digit(s[i++], 10); if (digit < 0) { throw new NumberFormatException(); } if (result < multmin) { throw new NumberFormatException(); } result *= 10; if (result < limit + digit) { throw new NumberFormatException(); } result -= digit; } } else { throw new NumberFormatException(); } return negative ? result : -result; } public static byte[] ltoa(long i) { if (i == Long.MIN_VALUE) return LONG_MIN_VALUE_BYTES; int size = (i < 0) ? stringSize(-i) + 1 : stringSize(i); byte[] buf = new byte[size]; getChars(i, size, buf); return buf; } /** * Places characters representing the integer i into the * character array buf. The characters are placed into * the buffer backwards starting with the least significant * digit at the specified index (exclusive), and working * backwards from there. * <p/> * Will fail if i == Long.MIN_VALUE */ static void getChars(long i, int index, byte[] buf) { long q; int r; int charPos = index; byte sign = 0; if (i < 0) { sign = '-'; i = -i; } // Get 2 digits/iteration using longs until quotient fits into an int while (i > Integer.MAX_VALUE) { q = i / 100; // really: r = i - (q * 100); r = (int) (i - ((q << 6) + (q << 5) + (q << 2))); i = q; buf[--charPos] = DigitOnes[r]; buf[--charPos] = DigitTens[r]; } // Get 2 digits/iteration using ints int q2; int i2 = (int) i; while (i2 >= 65536) { q2 = i2 / 100; // really: r = i2 - (q * 100); r = i2 - ((q2 << 6) + (q2 << 5) + (q2 << 2)); i2 = q2; buf[--charPos] = DigitOnes[r]; buf[--charPos] = DigitTens[r]; } // Fall thru to fast mode for smaller numbers // assert(i2 <= 65536, i2); for (; ; ) { q2 = (i2 * 52429) >>> (16 + 3); r = i2 - ((q2 << 3) + (q2 << 1)); // r = i2-(q2*10) ... buf[--charPos] = digits[r]; i2 = q2; if (i2 == 0) break; } if (sign != 0) { buf[--charPos] = sign; } } // Requires positive x static int stringSize(long x) { long p = 10; for (int i = 1; i < 19; i++) { if (x < p) return i; p = 10 * p; } return 19; } public static long atol(byte[] s) throws NumberFormatException { long result = 0; boolean negative = false; int i = 0, len = s.length; long limit = -Long.MAX_VALUE; long multmin; int digit; if (len > 0) { byte firstChar = s[0]; if (firstChar < '0') { // Possible leading "-" if (firstChar == '-') { negative = true; limit = Long.MIN_VALUE; } else throw new NumberFormatException(); if (len == 1) // Cannot have lone "-" throw new NumberFormatException(); i++; } multmin = limit / 10; while (i < len) { // Accumulating negatively avoids surprises near MAX_VALUE digit = Character.digit(s[i++], 10); if (digit < 0) { throw new NumberFormatException(); } if (result < multmin) { throw new NumberFormatException(); } result *= 10; if (result < limit + digit) { throw new NumberFormatException(); } result -= digit; } } else { throw new NumberFormatException(); } return negative ? result : -result; } }