/* * Copyright (c) 1997, 2003, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package sun.io; import sun.nio.cs.ext.IBM949; /** * @author Malcolm Ayres */ public class CharToByteCp949 extends CharToByteConverter { private static final char SBase = '\uAC00'; private static final char LBase = '\u1100'; private static final char VBase = '\u1161'; private static final char TBase = '\u11A7'; private static final int VCount = 21; private static final int TCount = 28; private static final byte G0 = 0; private static final byte G1 = 1; private static final byte G2 = 2; private static final byte G3 = 3; private byte charState = G0; private char l, v, t; private byte[] outputByte; private char highHalfZoneCode; private int mask1; private int mask2; private int shift; private short[] index1; private String index2; private String index2a; private final static IBM949 nioCoder = new IBM949(); public CharToByteCp949() { super(); highHalfZoneCode = 0; outputByte = new byte[2]; mask1 = 0xFFF8; mask2 = 0x0007; shift = 3; index1 = nioCoder.getEncoderIndex1(); index2 = nioCoder.getEncoderIndex2(); index2a = nioCoder.getEncoderIndex2a(); } /** * flush out any residual data and reset the buffer state */ public int flush(byte[] output, int outStart, int outEnd) throws MalformedInputException, ConversionBufferFullException { int bytesOut; byteOff = outStart; if (highHalfZoneCode != 0) { reset(); badInputLength = 0; throw new MalformedInputException(); } if (charState != G0) { try { unicodeToBuffer(composeHangul() ,output, outEnd); } catch(UnknownCharacterException e) { reset(); badInputLength = 0; throw new MalformedInputException(); } charState = G0; } bytesOut = byteOff - outStart; reset(); return bytesOut; } /** * Resets converter to its initial state. */ public void reset() { highHalfZoneCode = 0; charState = G0; charOff = byteOff = 0; } /** * Returns true if the given character can be converted to the * target character encoding. */ public boolean canConvert(char ch) { int index; int theBytes; index = index1[((ch & mask1) >> shift)] + (ch & mask2); if (index < 15000) theBytes = (int)(index2.charAt(index)); else theBytes = (int)(index2a.charAt(index-15000)); if (theBytes != 0) return (true); // only return true if input char was unicode null - all others are // undefined return( ch == '\u0000'); } /** * Character conversion */ public int convert(char[] input, int inOff, int inEnd, byte[] output, int outOff, int outEnd) throws UnknownCharacterException, MalformedInputException, ConversionBufferFullException { char inputChar; int inputSize; charOff = inOff; byteOff = outOff; while (charOff < inEnd) { if (highHalfZoneCode == 0) { inputChar = input[charOff]; inputSize = 1; } else { inputChar = highHalfZoneCode; inputSize = 0; highHalfZoneCode = 0; } switch (charState) { case G0: l = LBase; v = VBase; t = TBase; if ( isLeadingC(inputChar) ) { // Leading Consonant l = inputChar; charState = G1; break; } if ( isVowel(inputChar) ) { // Vowel v = inputChar; charState = G2; break; } if ( isTrailingC(inputChar) ) { // Trailing Consonant t = inputChar; charState = G3; break; } break; case G1: if ( isLeadingC(inputChar) ) { // Leading Consonant l = composeLL(l, inputChar); break; } if ( isVowel(inputChar) ) { // Vowel v = inputChar; charState = G2; break; } if ( isTrailingC(inputChar) ) { // Trailing Consonant t = inputChar; charState = G3; break; } unicodeToBuffer(composeHangul(), output, outEnd); charState = G0; break; case G2: if ( isLeadingC(inputChar) ) { // Leading Consonant unicodeToBuffer(composeHangul(), output, outEnd); l = inputChar; v = VBase; t = TBase; charState = G1; break; } if ( isVowel(inputChar) ) { // Vowel v = composeVV(l, inputChar); charState = G2; break; } if ( isTrailingC(inputChar) ) { // Trailing Consonant t = inputChar; charState = G3; break; } unicodeToBuffer(composeHangul(), output, outEnd); charState = G0; break; case G3: if ( isTrailingC(inputChar) ) { // Trailing Consonant t = composeTT(t, inputChar); charState = G3; break; } unicodeToBuffer(composeHangul(), output, outEnd); charState = G0; break; } if (charState != G0) charOff++; else { // Is this a high surrogate? if(inputChar >= '\ud800' && inputChar <= '\udbff') { // Is this the last character of the input? if (charOff + inputSize >= inEnd) { highHalfZoneCode = inputChar; charOff += inputSize; break; } // Is there a low surrogate following? inputChar = input[charOff + inputSize]; if (inputChar >= '\udc00' && inputChar <= '\udfff') { // We have a valid surrogate pair. Too bad we don't do // surrogates. Is substitution enabled? if (subMode) { if (subBytes.length == 1) { outputByte[0] = 0x00; outputByte[1] = subBytes[0]; } else { outputByte[0] = subBytes[0]; outputByte[1] = subBytes[1]; } bytesToBuffer(outputByte, output, outEnd); inputSize++; } else { badInputLength = 2; throw new UnknownCharacterException(); } } else { // We have a malformed surrogate pair badInputLength = 1; throw new MalformedInputException(); } } // Is this an unaccompanied low surrogate? else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { badInputLength = 1; throw new MalformedInputException(); } else { unicodeToBuffer(inputChar, output, outEnd); } charOff += inputSize; } } return byteOff - outOff; } private char composeHangul() { int lIndex, vIndex, tIndex; lIndex = l - LBase; vIndex = v - VBase; tIndex = t - TBase; return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase); } private char composeLL(char l1, char l2) { return l2; } private char composeVV(char v1, char v2) { return v2; } private char composeTT(char t1, char t2) { return t2; } private boolean isLeadingC(char c) { return (c >= LBase && c <= '\u1159'); } private boolean isVowel(char c) { return (c >= VBase && c <= '\u11a2'); } private boolean isTrailingC(char c) { return (c >= TBase && c <= '\u11f9'); } /** * returns the maximum number of bytes needed to convert a char */ public int getMaxBytesPerChar() { return 2; } /** * Return the character set ID */ public String getCharacterEncoding() { return "Cp949"; } /** * private function to add the bytes to the output buffer */ private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd) throws ConversionBufferFullException, UnknownCharacterException { int spaceNeeded; // ensure sufficient space for the bytes(s) if (theBytes[0] == 0x00) spaceNeeded = 1; else spaceNeeded = 2; if (byteOff + spaceNeeded > outEnd) throw new ConversionBufferFullException(); // move the data into the buffer if (spaceNeeded == 1) output[byteOff++] = theBytes[1]; else { output[byteOff++] = theBytes[0]; output[byteOff++] = theBytes[1]; } } /** * private function to add a unicode character to the output buffer */ private void unicodeToBuffer(char unicode, byte[] output, int outEnd) throws ConversionBufferFullException, UnknownCharacterException { int index; int theBytes; // first we convert the unicode to its byte representation index = index1[((unicode & mask1) >> shift)] + (unicode & mask2); if (index < 15000) theBytes = (int)(index2.charAt(index)); else theBytes = (int)(index2a.charAt(index-15000)); outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); outputByte[1] = (byte)(theBytes & 0x000000ff); // if the unicode was not mappable - look for the substitution bytes if (outputByte[0] == 0x00 && outputByte[1] == 0x00 && unicode != '\u0000') { if (subMode) { if (subBytes.length == 1) { outputByte[0] = 0x00; outputByte[1] = subBytes[0]; } else { outputByte[0] = subBytes[0]; outputByte[1] = subBytes[1]; } } else { badInputLength = 1; throw new UnknownCharacterException(); } } // now put the bytes in the buffer bytesToBuffer(outputByte, output, outEnd); } }