/* * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package sun.nio.cs.ext; import java.util.Collections; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.nio.charset.*; final class CompoundTextSupport { private static final class ControlSequence { final int hash; final byte[] escSequence; final byte[] encoding; ControlSequence(byte[] escSequence) { this(escSequence, null); } ControlSequence(byte[] escSequence, byte[] encoding) { if (escSequence == null) { throw new NullPointerException(); } this.escSequence = escSequence; this.encoding = encoding; int hash = 0; int length = escSequence.length; for (int i = 0; i < escSequence.length; i++) { hash += (((int)escSequence[i]) & 0xff) << (i % 4); } if (encoding != null) { for (int i = 0; i < encoding.length; i++) { hash += (((int)encoding[i]) & 0xff) << (i % 4); } length += 2 /* M L */ + encoding.length + 1 /* 0x02 */; } this.hash = hash; if (MAX_CONTROL_SEQUENCE_LEN < length) { MAX_CONTROL_SEQUENCE_LEN = length; } } public boolean equals(Object obj) { if (this == obj) { return true; } if (!(obj instanceof ControlSequence)) { return false; } ControlSequence rhs = (ControlSequence)obj; if (escSequence != rhs.escSequence) { if (escSequence.length != rhs.escSequence.length) { return false; } for (int i = 0; i < escSequence.length; i++) { if (escSequence[i] != rhs.escSequence[i]) { return false; } } } if (encoding != rhs.encoding) { if (encoding == null || rhs.encoding == null || encoding.length != rhs.encoding.length) { return false; } for (int i = 0; i < encoding.length; i++) { if (encoding[i] != rhs.encoding[i]) { return false; } } } return true; } public int hashCode() { return hash; } ControlSequence concatenate(ControlSequence rhs) { if (encoding != null) { throw new IllegalArgumentException ("cannot concatenate to a non-standard charset escape " + "sequence"); } int len = escSequence.length + rhs.escSequence.length; byte[] newEscSequence = new byte[len]; System.arraycopy(escSequence, 0, newEscSequence, 0, escSequence.length); System.arraycopy(rhs.escSequence, 0, newEscSequence, escSequence.length, rhs.escSequence.length); return new ControlSequence(newEscSequence, rhs.encoding); } } static int MAX_CONTROL_SEQUENCE_LEN; /** * Maps a GL or GR escape sequence to an encoding. */ private static final Map<ControlSequence, String> sequenceToEncodingMap; /** * Indicates whether a particular encoding wants the high bit turned on * or off. */ private static final Map<ControlSequence, Boolean> highBitsMap; /** * Maps an encoding to an escape sequence. Rather than manage two * converters in CharToByteCOMPOUND_TEXT, we output escape sequences which * modify both GL and GR if necessary. This makes the output slightly less * efficient, but our code much simpler. */ private static final Map<String, ControlSequence> encodingToSequenceMap; /** * The keys of 'encodingToSequenceMap', sorted in preferential order. */ private static final List<String> encodings; static { HashMap<ControlSequence, String> tSequenceToEncodingMap = new HashMap<>(33, 1.0f); HashMap<ControlSequence, Boolean> tHighBitsMap = new HashMap<>(31, 1.0f); HashMap<String, ControlSequence> tEncodingToSequenceMap = new HashMap<>(21, 1.0f); ArrayList<String> tEncodings = new ArrayList<>(21); if (!(isEncodingSupported("US-ASCII") && isEncodingSupported("ISO-8859-1"))) { throw new ExceptionInInitializerError ("US-ASCII and ISO-8859-1 unsupported"); } ControlSequence leftAscii = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x28, 0x42 }); tSequenceToEncodingMap.put(leftAscii, "US-ASCII"); tHighBitsMap.put(leftAscii, Boolean.FALSE); { ControlSequence rightAscii = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x29, 0x42 }); tSequenceToEncodingMap.put(rightAscii, "US-ASCII"); tHighBitsMap.put(rightAscii, Boolean.FALSE); } { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x41 }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-1"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-1", fullSet); tEncodings.add("ISO-8859-1"); } if (isEncodingSupported("ISO-8859-2")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x42 }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-2"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-2", fullSet); tEncodings.add("ISO-8859-2"); } if (isEncodingSupported("ISO-8859-3")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x43 }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-3"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-3", fullSet); tEncodings.add("ISO-8859-3"); } if (isEncodingSupported("ISO-8859-4")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x44 }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-4"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-4", fullSet); tEncodings.add("ISO-8859-4"); } if (isEncodingSupported("ISO-8859-5")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x4C }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-5"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-5", fullSet); tEncodings.add("ISO-8859-5"); } if (isEncodingSupported("ISO-8859-6")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x47 }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-6"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-6", fullSet); tEncodings.add("ISO-8859-6"); } if (isEncodingSupported("ISO-8859-7")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x46 }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-7"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-7", fullSet); tEncodings.add("ISO-8859-7"); } if (isEncodingSupported("ISO-8859-8")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x48 }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-8"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-8", fullSet); tEncodings.add("ISO-8859-8"); } if (isEncodingSupported("ISO-8859-9")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x4D }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-9"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-9", fullSet); tEncodings.add("ISO-8859-9"); } if (isEncodingSupported("JIS_X0201")) { ControlSequence glLeft = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x28, 0x4A }); ControlSequence glRight = // high bit off, turn on new ControlSequence(new byte[] { 0x1B, 0x28, 0x49 }); ControlSequence grLeft = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x29, 0x4A }); ControlSequence grRight = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x29, 0x49 }); tSequenceToEncodingMap.put(glLeft, "JIS_X0201"); tSequenceToEncodingMap.put(glRight, "JIS_X0201"); tSequenceToEncodingMap.put(grLeft, "JIS_X0201"); tSequenceToEncodingMap.put(grRight, "JIS_X0201"); tHighBitsMap.put(glLeft, Boolean.FALSE); tHighBitsMap.put(glRight, Boolean.TRUE); tHighBitsMap.put(grLeft, Boolean.FALSE); tHighBitsMap.put(grRight, Boolean.TRUE); ControlSequence fullSet = glLeft.concatenate(grRight); tEncodingToSequenceMap.put("JIS_X0201", fullSet); tEncodings.add("JIS_X0201"); } if (isEncodingSupported("X11GB2312")) { ControlSequence leftHalf = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x41 }); ControlSequence rightHalf = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x41 }); tSequenceToEncodingMap.put(leftHalf, "X11GB2312"); tSequenceToEncodingMap.put(rightHalf, "X11GB2312"); tHighBitsMap.put(leftHalf, Boolean.FALSE); tHighBitsMap.put(rightHalf, Boolean.FALSE); tEncodingToSequenceMap.put("X11GB2312", leftHalf); tEncodings.add("X11GB2312"); } if (isEncodingSupported("x-JIS0208")) { ControlSequence leftHalf = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x42 }); ControlSequence rightHalf = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x42 }); tSequenceToEncodingMap.put(leftHalf, "x-JIS0208"); tSequenceToEncodingMap.put(rightHalf, "x-JIS0208"); tHighBitsMap.put(leftHalf, Boolean.FALSE); tHighBitsMap.put(rightHalf, Boolean.FALSE); tEncodingToSequenceMap.put("x-JIS0208", leftHalf); tEncodings.add("x-JIS0208"); } if (isEncodingSupported("X11KSC5601")) { ControlSequence leftHalf = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x43 }); ControlSequence rightHalf = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x43 }); tSequenceToEncodingMap.put(leftHalf, "X11KSC5601"); tSequenceToEncodingMap.put(rightHalf, "X11KSC5601"); tHighBitsMap.put(leftHalf, Boolean.FALSE); tHighBitsMap.put(rightHalf, Boolean.FALSE); tEncodingToSequenceMap.put("X11KSC5601", leftHalf); tEncodings.add("X11KSC5601"); } // Encodings not listed in Compound Text Encoding spec // Esc seq: -b if (isEncodingSupported("ISO-8859-15")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x62 }); tSequenceToEncodingMap.put(rightHalf, "ISO-8859-15"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("ISO-8859-15", fullSet); tEncodings.add("ISO-8859-15"); } // Esc seq: -T if (isEncodingSupported("TIS-620")) { ControlSequence rightHalf = // high bit on, leave on new ControlSequence(new byte[] { 0x1B, 0x2D, 0x54 }); tSequenceToEncodingMap.put(rightHalf, "TIS-620"); tHighBitsMap.put(rightHalf, Boolean.TRUE); ControlSequence fullSet = leftAscii.concatenate(rightHalf); tEncodingToSequenceMap.put("TIS-620", fullSet); tEncodings.add("TIS-620"); } if (isEncodingSupported("JIS_X0212-1990")) { ControlSequence leftHalf = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x44 }); ControlSequence rightHalf = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x44 }); tSequenceToEncodingMap.put(leftHalf, "JIS_X0212-1990"); tSequenceToEncodingMap.put(rightHalf, "JIS_X0212-1990"); tHighBitsMap.put(leftHalf, Boolean.FALSE); tHighBitsMap.put(rightHalf, Boolean.FALSE); tEncodingToSequenceMap.put("JIS_X0212-1990", leftHalf); tEncodings.add("JIS_X0212-1990"); } if (isEncodingSupported("X11CNS11643P1")) { ControlSequence leftHalf = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x47 }); ControlSequence rightHalf = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x47 }); tSequenceToEncodingMap.put(leftHalf, "X11CNS11643P1"); tSequenceToEncodingMap.put(rightHalf, "X11CNS11643P1"); tHighBitsMap.put(leftHalf, Boolean.FALSE); tHighBitsMap.put(rightHalf, Boolean.FALSE); tEncodingToSequenceMap.put("X11CNS11643P1", leftHalf); tEncodings.add("X11CNS11643P1"); } if (isEncodingSupported("X11CNS11643P2")) { ControlSequence leftHalf = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x48 }); ControlSequence rightHalf = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x48 }); tSequenceToEncodingMap.put(leftHalf, "X11CNS11643P2"); tSequenceToEncodingMap.put(rightHalf, "X11CNS11643P2"); tHighBitsMap.put(leftHalf, Boolean.FALSE); tHighBitsMap.put(rightHalf, Boolean.FALSE); tEncodingToSequenceMap.put("X11CNS11643P2", leftHalf); tEncodings.add("X11CNS11643P2"); } if (isEncodingSupported("X11CNS11643P3")) { ControlSequence leftHalf = // high bit off, leave off new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x49 }); ControlSequence rightHalf = // high bit on, turn off new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x49 }); tSequenceToEncodingMap.put(leftHalf, "X11CNS11643P3"); tSequenceToEncodingMap.put(rightHalf, "X11CNS11643P3"); tHighBitsMap.put(leftHalf, Boolean.FALSE); tHighBitsMap.put(rightHalf, Boolean.FALSE); tEncodingToSequenceMap.put("X11CNS11643P3", leftHalf); tEncodings.add("X11CNS11643P3"); } // Esc seq: %/2??SUN-KSC5601.1992-3 if (isEncodingSupported("x-Johab")) { // 0x32 looks wrong. It's copied from the Sun X11 Compound Text // support code. It implies that all Johab characters comprise two // octets, which isn't true. Johab supports the ASCII/KS-Roman // characters from 0x21-0x7E with single-byte representations. ControlSequence johab = new ControlSequence( new byte[] { 0x1b, 0x25, 0x2f, 0x32 }, new byte[] { 0x53, 0x55, 0x4e, 0x2d, 0x4b, 0x53, 0x43, 0x35, 0x36, 0x30, 0x31, 0x2e, 0x31, 0x39, 0x39, 0x32, 0x2d, 0x33 }); tSequenceToEncodingMap.put(johab, "x-Johab"); tEncodingToSequenceMap.put("x-Johab", johab); tEncodings.add("x-Johab"); } // Esc seq: %/2??SUN-BIG5-1 if (isEncodingSupported("Big5")) { // 0x32 looks wrong. It's copied from the Sun X11 Compound Text // support code. It implies that all Big5 characters comprise two // octets, which isn't true. Big5 supports the ASCII/CNS-Roman // characters from 0x21-0x7E with single-byte representations. ControlSequence big5 = new ControlSequence( new byte[] { 0x1b, 0x25, 0x2f, 0x32 }, new byte[] { 0x53, 0x55, 0x4e, 0x2d, 0x42, 0x49, 0x47, 0x35, 0x2d, 0x31 }); tSequenceToEncodingMap.put(big5, "Big5"); tEncodingToSequenceMap.put("Big5", big5); tEncodings.add("Big5"); } sequenceToEncodingMap = Collections.unmodifiableMap(tSequenceToEncodingMap); highBitsMap = Collections.unmodifiableMap(tHighBitsMap); encodingToSequenceMap = Collections.unmodifiableMap(tEncodingToSequenceMap); encodings = Collections.unmodifiableList(tEncodings); } private static boolean isEncodingSupported(String encoding) { try { if (Charset.isSupported(encoding)) return true; } catch (IllegalArgumentException x) { } return (getDecoder(encoding) != null && getEncoder(encoding) != null); } // For Decoder static CharsetDecoder getStandardDecoder(byte[] escSequence) { return getNonStandardDecoder(escSequence, null); } static boolean getHighBit(byte[] escSequence) { Boolean bool = highBitsMap.get(new ControlSequence(escSequence)); return (bool == Boolean.TRUE); } static CharsetDecoder getNonStandardDecoder(byte[] escSequence, byte[] encoding) { return getDecoder(sequenceToEncodingMap.get (new ControlSequence(escSequence, encoding))); } static CharsetDecoder getDecoder(String enc) { if (enc == null) { return null; } Charset cs = null; try { cs = Charset.forName(enc); } catch (IllegalArgumentException e) { Class<?> cls; try { cls = Class.forName("sun.awt.motif." + enc); } catch (ClassNotFoundException ee) { return null; } try { cs = (Charset)cls.newInstance(); } catch (InstantiationException ee) { return null; } catch (IllegalAccessException ee) { return null; } } try { return cs.newDecoder(); } catch (UnsupportedOperationException e) {} return null; } // For Encoder static byte[] getEscapeSequence(String encoding) { ControlSequence seq = encodingToSequenceMap.get(encoding); if (seq != null) { return seq.escSequence; } return null; } static byte[] getEncoding(String encoding) { ControlSequence seq = encodingToSequenceMap.get(encoding); if (seq != null) { return seq.encoding; } return null; } static List<String> getEncodings() { return encodings; } static CharsetEncoder getEncoder(String enc) { if (enc == null) { return null; } Charset cs = null; try { cs = Charset.forName(enc); } catch (IllegalArgumentException e) { Class<?> cls; try { cls = Class.forName("sun.awt.motif." + enc); } catch (ClassNotFoundException ee) { return null; } try { cs = (Charset)cls.newInstance(); } catch (InstantiationException ee) { return null; } catch (IllegalAccessException ee) { return null; } } try { return cs.newEncoder(); } catch (Throwable e) {} return null; } // Not an instantiable class private CompoundTextSupport() {} }