/* * @(#)StringCoding.java 1.9 02/04/09 * * Copyright 1990-2008 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version * 2 only, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is * included at /legal/license.txt). * * You should have received a copy of the GNU General Public License * version 2 along with this work; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa * Clara, CA 95054 or visit www.sun.com if you need additional * information or have any questions. */ package java.lang; import java.io.CharConversionException; import java.io.UnsupportedEncodingException; import java.lang.ref.SoftReference; /* *import java.nio.ByteBuffer; *import java.nio.CharBuffer; *import java.nio.BufferOverflowException; *import java.nio.BufferUnderflowException; *import java.nio.charset.Charset; *import java.nio.charset.CharsetDecoder; *import java.nio.charset.CharsetEncoder; *import java.nio.charset.CharacterCodingException; *import java.nio.charset.CoderResult; *import java.nio.charset.CodingErrorAction; *import java.nio.charset.IllegalCharsetNameException; *import java.nio.charset.MalformedInputException; *import java.nio.charset.UnsupportedCharsetException; *import sun.misc.MessageUtils; *import sun.nio.cs.HistoricallyNamedCharset; */ import sun.io.ByteToCharConverter; import sun.io.CharToByteConverter; import sun.io.Converters; import sun.misc.CVM; /** * Utility class for string encoding and decoding. */ class StringCoding { private StringCoding() { } /* The cached coders for each thread */ private static ThreadLocal decoder = new ThreadLocal(); private static ThreadLocal encoder = new ThreadLocal(); private static boolean warnUnsupportedCharset = true; private static Object deref(ThreadLocal tl) { SoftReference sr = (SoftReference)tl.get(); if (sr == null) return null; return sr.get(); } private static void set(ThreadLocal tl, Object ob) { tl.set(new SoftReference(ob)); } // Trim the given byte array to the given length // private static byte[] trim(byte[] ba, int len) { if (len == ba.length) return ba; byte[] tba = new byte[len]; /* IAI - 15 */ CVM.copyByteArray(ba, 0, tba, 0, len); /* IAI - 15 */ return tba; } // Trim the given char array to the given length // private static char[] trim(char[] ca, int len) { if (len == ca.length) return ca; char[] tca = new char[len]; /* IAI - 15 */ CVM.copyCharArray(ca, 0, tca, 0, len); /* IAI - 15 */ return tca; } /* * private static Charset lookupCharset(String csn) { * if (csn.equalsIgnoreCase("PCK")) * return null; * if (Charset.isSupported(csn)) { * try { * return Charset.forName(csn); * } catch (UnsupportedCharsetException x) { * throw new Error(x); * } * } * return null; * } */ private static void warnUnsupportedCharset(String csn) { if (warnUnsupportedCharset) { // Use sun.misc.MessageUtils rather than the Logging API or // System.err since this method may be called during VM // initialization before either is available. //MessageUtils.err("WARNING: Default charset " + csn + // " not supported, using ISO-8859-1 instead"); warnUnsupportedCharset = false; } } // -- Decoding -- // Encapsulates either a ByteToCharConverter or a CharsetDecoder // private static abstract class StringDecoder { private final String requestedCharsetName; protected StringDecoder(String requestedCharsetName) { this.requestedCharsetName = requestedCharsetName; } final String requestedCharsetName() { return requestedCharsetName; } abstract String charsetName(); abstract char[] decode(byte[] ba, int off, int len); } // A string decoder based upon a ByteToCharConverter // private static class ConverterSD extends StringDecoder { private ByteToCharConverter btc; private ConverterSD(ByteToCharConverter btc, String rcn) { super(rcn); this.btc = btc; } String charsetName() { return btc.getCharacterEncoding(); } char[] decode(byte[] ba, int off, int len) { int en = btc.getMaxCharsPerByte() * len; char[] ca = new char[en]; if (len == 0) return ca; btc.reset(); int n = 0; try { n = btc.convert(ba, off, off + len, ca, 0, en); n += btc.flush(ca, btc.nextCharIndex(), en); } catch (CharConversionException x) { // Yes, this is what we've always done n = btc.nextCharIndex(); } return trim(ca, n); } } // A string decoder based upon a CharsetDecoder // /* * private static class CharsetSD * extends StringDecoder * { * private final Charset cs; * private final CharsetDecoder cd; * * private CharsetSD(Charset cs, String rcn) { * super(rcn); * this.cs = cs; * this.cd = cs.newDecoder() * .onMalformedInput(CodingErrorAction.REPLACE) * .onUnmappableCharacter(CodingErrorAction.REPLACE); * } * * String charsetName() { * if (cs instanceof HistoricallyNamedCharset) * return ((HistoricallyNamedCharset)cs).historicalName(); * return cs.name(); * } * * char[] decode(byte[] ba, int off, int len) { * int en = (int)(cd.maxCharsPerByte() * len); * char[] ca = new char[en]; * if (len == 0) * return ca; * cd.reset(); * ByteBuffer bb = ByteBuffer.wrap(ba, off, len); * CharBuffer cb = CharBuffer.wrap(ca); * try { * CoderResult cr = cd.decode(bb, cb, true); * if (!cr.isUnderflow()) * cr.throwException(); * cr = cd.flush(cb); * if (!cr.isUnderflow()) * cr.throwException(); * } catch (CharacterCodingException x) { * // Substitution is always enabled, * // so this shouldn't happen * throw new Error(x); * } * return trim(ca, cb.position()); * } * * } */ static char[] decode(String charsetName, byte[] ba, int off, int len) throws UnsupportedEncodingException { StringDecoder sd = (StringDecoder)deref(decoder); String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) || csn.equals(sd.charsetName()))) { /* * sd = null; * try { * Charset cs = lookupCharset(csn); * if (cs != null) * sd = new CharsetSD(cs, csn); * else * sd = null; * } catch (IllegalCharsetNameException x) { * // FALL THROUGH to ByteToCharConverter, for compatibility * } * if (sd == null) */ sd = new ConverterSD(ByteToCharConverter.getConverter(csn), csn); set(decoder, sd); } return sd.decode(ba, off, len); } static char[] decode(byte[] ba, int off, int len) { String csn = Converters.getDefaultEncodingName(); try { return decode(csn, ba, off, len); } catch (UnsupportedEncodingException x) { Converters.resetDefaultEncodingName(); warnUnsupportedCharset(csn); } try { return decode("ISO-8859-1", ba, off, len); } catch (UnsupportedEncodingException x) { // If this code is hit during VM initialization, MessageUtils is // the only way we will be able to get any kind of error message. //MessageUtils.err("ISO-8859-1 charset not available: " // + x.toString()); // If we can not find ISO-8859-1 (a required encoding) then things // are seriously wrong with the installation. System.exit(1); return null; } } // -- Encoding -- // Encapsulates either a CharToByteConverter or a CharsetEncoder // private static abstract class StringEncoder { private final String requestedCharsetName; protected StringEncoder(String requestedCharsetName) { this.requestedCharsetName = requestedCharsetName; } final String requestedCharsetName() { return requestedCharsetName; } abstract String charsetName(); abstract byte[] encode(char[] cs, int off, int len); } // A string encoder based upon a CharToByteConverter // private static class ConverterSE extends StringEncoder { private CharToByteConverter ctb; private ConverterSE(CharToByteConverter ctb, String rcn) { super(rcn); this.ctb = ctb; } String charsetName() { return ctb.getCharacterEncoding(); } byte[] encode(char[] ca, int off, int len) { int en = ctb.getMaxBytesPerChar() * len; byte[] ba = new byte[en]; if (len == 0) return ba; ctb.reset(); int n; try { n = ctb.convertAny(ca, off, (off + len), ba, 0, en); n += ctb.flushAny(ba, ctb.nextByteIndex(), en); } catch (CharConversionException x) { throw new Error("Converter malfunction: " + ctb.getClass().getName(), x); } return trim(ba, n); } } // A string encoder based upon a CharsetEncoder // /* * private static class CharsetSE * extends StringEncoder * { * private Charset cs; * private CharsetEncoder ce; * * private CharsetSE(Charset cs, String rcn) { * super(rcn); * this.cs = cs; * this.ce = cs.newEncoder() * .onMalformedInput(CodingErrorAction.REPLACE) * .onUnmappableCharacter(CodingErrorAction.REPLACE); * } * * String charsetName() { * if (cs instanceof HistoricallyNamedCharset) * return ((HistoricallyNamedCharset)cs).historicalName(); * return cs.name(); * } * * byte[] encode(char[] ca, int off, int len) { * int en = (int)(ce.maxBytesPerChar() * len); * byte[] ba = new byte[en]; * if (len == 0) * return ba; * * ce.reset(); * ByteBuffer bb = ByteBuffer.wrap(ba); * CharBuffer cb = CharBuffer.wrap(ca, off, len); * try { * CoderResult cr = ce.encode(cb, bb, true); * if (!cr.isUnderflow()) * cr.throwException(); * cr = ce.flush(bb); * if (!cr.isUnderflow()) * cr.throwException(); * } catch (CharacterCodingException x) { * // Substitution is always enabled, * // so this shouldn't happen * throw new Error(x); * } * return trim(ba, bb.position()); * } * * } */ static byte[] encode(String charsetName, char[] ca, int off, int len) throws UnsupportedEncodingException { StringEncoder se = (StringEncoder)deref(encoder); String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; if ((se == null) || !(csn.equals(se.requestedCharsetName()) || csn.equals(se.charsetName()))) { /* * se = null; * try { * Charset cs = lookupCharset(csn); * if (cs != null) * se = new CharsetSE(cs, csn); * } catch (IllegalCharsetNameException x) { * // FALL THROUGH to CharToByteConverter, for compatibility * } * if (se == null) */ se = new ConverterSE(CharToByteConverter.getConverter(csn), csn); set(encoder, se); } return se.encode(ca, off, len); } static byte[] encode(char[] ca, int off, int len) { String csn = Converters.getDefaultEncodingName(); try { return encode(csn, ca, off, len); } catch (UnsupportedEncodingException x) { Converters.resetDefaultEncodingName(); warnUnsupportedCharset(csn); } try { return encode("ISO-8859-1", ca, off, len); } catch (UnsupportedEncodingException x) { // If this code is hit during VM initialization, MessageUtils is // the only way we will be able to get any kind of error message. // MessageUtils.err("ISO-8859-1 charset not available: " // + x.toString()); // If we can not find ISO-8859-1 (a required encoding) then things // are seriously wrong with the installation. System.exit(1); return null; } } }