/* * Copyright (c) 1996, 1999, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package sun.io; import java.io.*; /** * Convert byte arrays containing Unicode characters into arrays of actual * Unicode characters. This class may be used directly, in which case it * expects the input byte array to begin with a byte-order mark, or it may be * subclassed in order to preset the byte order and mark behavior. * * <p> Whether or not a mark is expected, if a mark that does not match the * established byte order is later discovered then a * <tt>MalformedInputException</tt> will be thrown by the <tt>convert</tt> * method. If a correct mark is seen later in the input stream, it is passed * through as a character. * * @see ByteToCharUnicodeLittle * @see ByteToCharUnicodeLittleUnmarked * @see ByteToCharUnicodeBig * @see ByteToCharUnicodeBigUnmarked * * @author Mark Reinhold */ public class ByteToCharUnicode extends ByteToCharConverter { static final char BYTE_ORDER_MARK = (char) 0xfeff; static final char REVERSED_MARK = (char) 0xfffe; static final int AUTO = 0; static final int BIG = 1; static final int LITTLE = 2; int originalByteOrder; /* Byte order specified at creation */ int byteOrder; /* Byte order in use */ boolean usesMark; /* Look for a mark and interpret it */ /** * Creates a Unicode byte-to-char converter that expects the first pair of * input bytes to be a byte-order mark, which will be interpreted and * discarded. If the first pair of bytes is not such a mark then a * <tt>MalformedInputException</tt> will be thrown by the convert method. */ public ByteToCharUnicode() { originalByteOrder = byteOrder = AUTO; usesMark = true; } /** * Creates a Unicode byte-to-char converter that uses the given byte order * and may or may not insist upon an initial byte-order mark. */ protected ByteToCharUnicode(int bo, boolean m) { originalByteOrder = byteOrder = bo; usesMark = m; } public String getCharacterEncoding() { switch (originalByteOrder) { case BIG: return usesMark ? "UnicodeBig" : "UnicodeBigUnmarked"; case LITTLE: return usesMark ? "UnicodeLittle" : "UnicodeLittleUnmarked"; default: return "Unicode"; } } boolean started = false; int leftOverByte; boolean leftOver = false; public int convert(byte[] in, int inOff, int inEnd, char[] out, int outOff, int outEnd) throws ConversionBufferFullException, MalformedInputException { byteOff = inOff; charOff = outOff; if (inOff >= inEnd) return 0; int b1, b2; int bc = 0; int inI = inOff, outI = outOff; if (leftOver) { b1 = leftOverByte & 0xff; leftOver = false; } else { b1 = in[inI++] & 0xff; } bc = 1; if (usesMark && !started) { /* Read initial byte-order mark */ if (inI < inEnd) { b2 = in[inI++] & 0xff; bc = 2; char c = (char) ((b1 << 8) | b2); int bo = AUTO; if (c == BYTE_ORDER_MARK) bo = BIG; else if (c == REVERSED_MARK) bo = LITTLE; if (byteOrder == AUTO) { if (bo == AUTO) { badInputLength = bc; throw new MalformedInputException("Missing byte-order mark"); } byteOrder = bo; if (inI < inEnd) { b1 = in[inI++] & 0xff; bc = 1; } } else if (bo == AUTO) { inI--; bc = 1; } else if (byteOrder == bo) { if (inI < inEnd) { b1 = in[inI++] & 0xff; bc = 1; } } else { badInputLength = bc; throw new MalformedInputException("Incorrect byte-order mark"); } started = true; } } /* Loop invariant: (b1 contains the next input byte) && (bc == 1) */ while (inI < inEnd) { b2 = in[inI++] & 0xff; bc = 2; char c; if (byteOrder == BIG) c = (char) ((b1 << 8) | b2); else c = (char) ((b2 << 8) | b1); if (c == REVERSED_MARK) throw new MalformedInputException("Reversed byte-order mark"); if (outI >= outEnd) throw new ConversionBufferFullException(); out[outI++] = c; byteOff = inI; charOff = outI; if (inI < inEnd) { b1 = in[inI++] & 0xff; bc = 1; } } if (bc == 1) { leftOverByte = b1; byteOff = inI; leftOver = true; } return outI - outOff; } public void reset() { leftOver = false; byteOff = charOff = 0; started = false; byteOrder = originalByteOrder; } public int flush(char buf[], int off, int len) throws MalformedInputException { if (leftOver) { reset(); throw new MalformedInputException(); } byteOff = charOff = 0; return 0; } }