/* * Copyright 1996-1997 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Sun designates this * particular file as subject to the "Classpath" exception as provided * by Sun in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ package sun.io; /** * UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter * It's represented like below. * * # Bits Bit pattern * 1 7 0xxxxxxx * 2 11 110xxxxx 10xxxxxx * 3 16 1110xxxx 10xxxxxx 10xxxxxx * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * * UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6 */ public class CharToByteUTF8 extends CharToByteConverter { private char highHalfZoneCode; public int flush(byte[] output, int outStart, int outEnd) throws MalformedInputException { if (highHalfZoneCode != 0) { highHalfZoneCode = 0; badInputLength = 0; throw new MalformedInputException(); } byteOff = charOff = 0; return 0; } /** * Character conversion */ public int convert(char[] input, int inOff, int inEnd, byte[] output, int outOff, int outEnd) throws ConversionBufferFullException, MalformedInputException { char inputChar; byte[] outputByte = new byte[6]; int inputSize; int outputSize; charOff = inOff; byteOff = outOff; if (highHalfZoneCode != 0) { inputChar = highHalfZoneCode; highHalfZoneCode = 0; if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) { // This is legal UTF16 sequence. int ucs4 = (highHalfZoneCode - 0xd800) * 0x400 + (input[inOff] - 0xdc00) + 0x10000; output[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07); output[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f)); output[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f)); output[3] = (byte)(0x80 | (ucs4 & 0x3f)); charOff++; highHalfZoneCode = 0; } else { // This is illegal UTF16 sequence. badInputLength = 0; throw new MalformedInputException(); } } while(charOff < inEnd) { inputChar = input[charOff]; if (inputChar < 0x80) { outputByte[0] = (byte)inputChar; inputSize = 1; outputSize = 1; } else if (inputChar < 0x800) { outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f)); outputByte[1] = (byte)(0x80 | (inputChar & 0x3f)); inputSize = 1; outputSize = 2; } else if (inputChar >= 0xd800 && inputChar <= 0xdbff) { // this is <high-half zone code> in UTF-16 if (charOff + 1 >= inEnd) { highHalfZoneCode = inputChar; break; } // check next char is valid <low-half zone code> char lowChar = input[charOff + 1]; if (lowChar < 0xdc00 || lowChar > 0xdfff) { badInputLength = 1; throw new MalformedInputException(); } int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00) + 0x10000; outputByte[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07); outputByte[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f)); outputByte[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f)); outputByte[3] = (byte)(0x80 | (ucs4 & 0x3f)); outputSize = 4; inputSize = 2; } else { outputByte[0] = (byte)(0xe0 | ((inputChar >> 12)) & 0x0f); outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f)); outputByte[2] = (byte)(0x80 | (inputChar & 0x3f)); inputSize = 1; outputSize = 3; } if (byteOff + outputSize > outEnd) { throw new ConversionBufferFullException(); } for (int i = 0; i < outputSize; i++) { output[byteOff++] = outputByte[i]; } charOff += inputSize; } return byteOff - outOff; } public boolean canConvert(char ch) { return true; } public int getMaxBytesPerChar() { return 3; } public void reset() { byteOff = charOff = 0; highHalfZoneCode = 0; } public String getCharacterEncoding() { return "UTF8"; } }