/** * * Copyright 2004-2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.geronimo.interop.util; public abstract class UTF8 { public static byte[] fromString(String value) { int n = value.length(), u = 0; for (int i = 0; i < n; i++) { int c = value.charAt(i); if (c >= 0x0001 && c <= 0x007F) { u++; } else if (c > 0x07FF) { u += 3; } else { u += 2; } } byte[] bytes = new byte[u]; for (int i = 0, j = 0; i < n; i++) { int c = value.charAt(i); if (c >= 0x0001 && c <= 0x007F) { bytes[j++] = (byte) c; } else if (c > 0x07FF) { bytes[j++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); bytes[j++] = (byte) (0x80 | ((c >> 6) & 0x3F)); bytes[j++] = (byte) (0x80 | (c & 0x3F)); } else { bytes[j++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); bytes[j++] = (byte) (0x80 | (c & 0x3F)); } } return bytes; } /** * * If there is sufficient space in buffer from offset to convert value * * without allocating a new byte array, do so now and return the number * * of bytes written. Otherwise return -1. This method is intended for * * use in optimized string marshalling. */ public static int fromString(String value, byte[] buffer, int offset, int length) { int n = value.length(), j = offset; for (int i = 0; i < n; i++) { if (j + 3 > length) { return -1; } int c = value.charAt(i); if (c >= 0x0001 && c <= 0x007F) { buffer[j++] = (byte) c; } else if (c > 0x07FF) { buffer[j++] = (byte) (0xE0 | ((c >> 12) & 0x0F)); buffer[j++] = (byte) (0x80 | ((c >> 6) & 0x3F)); buffer[j++] = (byte) (0x80 | (c & 0x3F)); } else { buffer[j++] = (byte) (0xC0 | ((c >> 6) & 0x1F)); buffer[j++] = (byte) (0x80 | (c & 0x3F)); } } return j - offset; } public static String toString(byte[] value) { return toString(value, 0, value.length); } public static String toString(byte[] value, int offset, int length) { int n = offset + length, j = 0; char[] chars = new char[length]; // May be more than we need, but not less for (int i = offset; i < n; i++) { int c = (value[i] + 256) & 255; // byte is signed, we need unsigned int c2, c3; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxx xxxx chars[j++] = (char) c; break; case 12: case 13: // 110x xxxx 10xx xxxx if (i + 1 >= n) { badUtf8Data(); } c2 = (value[++i] + 256) & 255; // byte is signed, we need unsigned if ((c2 & 0xC0) != 0x80) { badUtf8Data(); } chars[j++] = (char) (((c & 0x1F) << 6) | (c2 & 0x3F)); break; case 14: // 1110 xxxx 10xx xxxx 10xx xxxx if (i + 2 >= n) { badUtf8Data(); } c2 = (value[++i] + 256) & 255; // byte is signed, we need unsigned c3 = (value[++i] + 256) & 255; // byte is signed, we need unsigned if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80) { badUtf8Data(); } chars[j++] = (char) (((c & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); break; default: badUtf8Data(); } } return new String(chars, 0, j); } private static void badUtf8Data() { throw new org.omg.CORBA.MARSHAL("bad UTF-8 data"); } }