/* * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* ******************************************************************************* * Copyright (C) 2009-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ package sun.text.normalizer; import java.io.IOException; import java.nio.ByteBuffer; /** * @author aheninger * * A read-only Trie2, holding 16 bit data values. * * A Trie2 is a highly optimized data structure for mapping from Unicode * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. * * See class Trie2 for descriptions of the API for accessing the contents of a trie. * * The fundamental data access methods are declared final in this class, with * the intent that applications might gain a little extra performance, when compared * with calling the same methods via the abstract UTrie2 base class. */ public final class Trie2_16 extends Trie2 { /** * Internal constructor, not for general use. */ Trie2_16() { } /** * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). * The serialized format is identical between ICU4C and ICU4J, so this function * will work with serialized Trie2s from either. * * The serialized Trie2 in the bytes may be in either little or big endian byte order. * This allows using serialized Tries from ICU4C without needing to consider the * byte order of the system that created them. * * @param bytes a byte buffer to the serialized form of a UTrie2. * @return An unserialized Trie2_16, ready for use. * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. * @throws IOException if a read error occurs in the buffer. * @throws ClassCastException if the bytes contain a serialized Trie2_32 */ public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { return (Trie2_16) Trie2.createFromSerialized(bytes); } /** * Get the value for a code point as stored in the Trie2. * * @param codePoint the code point * @return the value */ @Override public final int get(int codePoint) { int value; int ix; if (codePoint >= 0) { if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { // Ordinary BMP code point, excluding leading surrogates. // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. // 16 bit data is stored in the index array itself. ix = index[codePoint >> UTRIE2_SHIFT_2]; ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); value = index[ix]; return value; } if (codePoint <= 0xffff) { // Lead Surrogate Code Point. A Separate index section is stored for // lead surrogate code units and code points. // The main index has the code unit data. // For this function, we need the code point data. // Note: this expression could be refactored for slightly improved efficiency, but // surrogate code points will be so rare in practice that it's not worth it. ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); value = index[ix]; return value; } if (codePoint < highStart) { // Supplemental code point, use two-level lookup. ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); ix = index[ix]; ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; ix = index[ix]; ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); value = index[ix]; return value; } if (codePoint <= 0x10ffff) { value = index[highValueIndex]; return value; } } // Fall through. The code point is outside of the legal range of 0..0x10ffff. return errorValue; } /** * Get a Trie2 value for a UTF-16 code unit. * * This function returns the same value as get() if the input * character is outside of the lead surrogate range * * There are two values stored in a Trie2 for inputs in the lead * surrogate range. This function returns the alternate value, * while Trie2.get() returns the main value. * * @param codeUnit a 16 bit code unit or lead surrogate value. * @return the value */ @Override public int getFromU16SingleLead(char codeUnit) { int value; int ix; // Because the input is a 16 bit char, we can skip the tests for it being in // the BMP range. It is. ix = index[codeUnit >> UTRIE2_SHIFT_2]; ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); value = index[ix]; return value; } /** * @return the number of bytes of the serialized trie */ public int getSerializedLength() { return 16+(header.indexLength+dataLength)*2; } }