/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is part of dcm4che, an implementation of DICOM(TM) in * Java(TM), hosted at http://sourceforge.net/projects/dcm4che. * * The Initial Developer of the Original Code is * Gunter Zeilinger, Huetteldorferstr. 24/10, 1150 Vienna/Austria/Europe. * Portions created by the Initial Developer are Copyright (C) 2010 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Gunter Zeilinger <gunterze@gmail.com> * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is part of dcm4che, an implementation of DICOM(TM) in * Java(TM), hosted at https://github.com/gunterze/dcm4che. * * The Initial Developer of the Original Code is * Agfa Healthcare. * Portions created by the Initial Developer are Copyright (C) 2011 * the Initial Developer. All Rights Reserved. * * Contributor(s): * See @authors listed below * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ package org.dcm4che3.data; import java.io.UnsupportedEncodingException; import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.*; import java.util.Arrays; import java.util.StringTokenizer; /** * @author Gunter Zeilinger <gunterze@gmail.com> */ public class SpecificCharacterSet { public static final SpecificCharacterSet ASCII = new SpecificCharacterSet(new Codec[]{Codec.ISO_646}); public static SpecificCharacterSet DEFAULT = ASCII; private static ThreadLocal<SoftReference<Encoder>> cachedEncoder1 = new ThreadLocal<SoftReference<Encoder>>(); private static ThreadLocal<SoftReference<Encoder>> cachedEncoder2 = new ThreadLocal<SoftReference<Encoder>>(); protected final Codec[] codecs; protected final String[] dicomCodes; private enum Codec { ISO_646("US-ASCII", true, 0x2842, 0, 1), ISO_8859_1("ISO-8859-1", true, 0x2842, 0x2d41, 1), ISO_8859_2("ISO-8859-2", true, 0x2842, 0x2d42, 1), ISO_8859_3("ISO-8859-3", true, 0x2842, 0x2d43, 1), ISO_8859_4("ISO-8859-4", true, 0x2842, 0x2d44, 1), ISO_8859_5("ISO-8859-5", true, 0x2842, 0x2d4c, 1), ISO_8859_6("ISO-8859-6", true, 0x2842, 0x2d47, 1), ISO_8859_7("ISO-8859-7", true, 0x2842, 0x2d46, 1), ISO_8859_8("ISO-8859-8", true, 0x2842, 0x2d48, 1), ISO_8859_9("ISO-8859-9", true, 0x2842, 0x2d4d, 1), JIS_X_201("JIS_X0201", true, 0x284a, 0x2949, 1) { @Override public String toText(String s) { return s.replace('\\', '¥'); } }, TIS_620("TIS-620", true, 0x2842, 0x2d54, 1), JIS_X_208("x-JIS0208", false, 0x2442, 0, 1), JIS_X_212("JIS_X0212-1990", false, 0x242844, 0, 2), KS_X_1001("EUC-KR", false, 0x2842, 0x242943, -1), GB2312("GB2312", false, 0x2842, 0x242941, -1), UTF_8("UTF-8", true, 0, 0, -1), GB18030("GB18030", false, 0, 0, -1); private final String charsetName; private final boolean containsASCII; private final int escSeq0; private final int escSeq1; private final int bytesPerChar; Codec(String charsetName, boolean containsASCII, int escSeq0, int escSeq1, int bytesPerChar) { this.charsetName = charsetName; this.containsASCII = containsASCII; this.escSeq0 = escSeq0; this.escSeq1 = escSeq1; this.bytesPerChar = bytesPerChar; } public static Codec forCode(String code) { if (code == null) return ISO_646; switch(last2digits(code)) { case 0: if (code.equals("ISO_IR 100") || code.equals("ISO 2022 IR 100")) return Codec.ISO_8859_1; break; case 1: if (code.equals("ISO_IR 101") || code.equals("ISO 2022 IR 101")) return Codec.ISO_8859_2; break; case 6: if (code.equals("ISO 2022 IR 6")) return Codec.ISO_646; break; case 9: if (code.equals("ISO_IR 109") || code.equals("ISO 2022 IR 109")) return Codec.ISO_8859_3; break; case 10: if (code.equals("ISO_IR 110") || code.equals("ISO 2022 IR 110")) return Codec.ISO_8859_4; break; case 13: if (code.equals("ISO_IR 13") || code.equals("ISO 2022 IR 13")) return Codec.JIS_X_201; break; case 26: if (code.equals("ISO_IR 126") || code.equals("ISO 2022 IR 126")) return Codec.ISO_8859_7; break; case 27: if (code.equals("ISO_IR 127") || code.equals("ISO 2022 IR 127")) return Codec.ISO_8859_6; break; case 30: if (code.equals("GB18030")) return Codec.GB18030; break; case 31: if (code.equals("GBK")) return Codec.GB18030; break; case 38: if (code.equals("ISO_IR 138") || code.equals("ISO 2022 IR 138")) return Codec.ISO_8859_8; break; case 44: if (code.equals("ISO_IR 144") || code.equals("ISO 2022 IR 144")) return Codec.ISO_8859_5; break; case 48: if (code.equals("ISO_IR 148") || code.equals("ISO 2022 IR 148")) return Codec.ISO_8859_9; break; case 49: if (code.equals("ISO 2022 IR 149")) return Codec.KS_X_1001; break; case 58: if (code.equals("ISO 2022 IR 58")) return Codec.GB2312; break; case 59: if (code.equals("ISO 2022 IR 159")) return Codec.JIS_X_212; break; case 66: if (code.equals("ISO_IR 166") || code.equals("ISO 2022 IR 166")) return Codec.TIS_620; break; case 87: if (code.equals("ISO 2022 IR 87")) return Codec.JIS_X_208; break; case 92: if (code.equals("ISO_IR 192")) return Codec.UTF_8; break; } return ISO_646; } private static int last2digits(String code) { int len = code.length(); if (len < 2) return -1; char ch1 = code.charAt(len-1); char ch2 = code.charAt(len-2); return (ch2 & 15) * 10 + (ch1 & 15); } public byte[] encode(String val) { try { return val.getBytes(charsetName); } catch (UnsupportedEncodingException e) { throw new AssertionError(e); } } public String decode(byte[] b, int off, int len) { try { return new String(b, off, len, charsetName); } catch (UnsupportedEncodingException e) { throw new AssertionError(e); } } public boolean containsASCII() { return containsASCII; } public int getEscSeq0() { return escSeq0; } public int getEscSeq1() { return escSeq1; } public int getBytesPerChar() { return bytesPerChar; } public String toText(String s) { return s; } } private static final class Encoder { final Codec codec; final CharsetEncoder encoder; public Encoder(Codec codec) { this.codec = codec; this.encoder = Charset.forName(codec.charsetName).newEncoder(); } public boolean encode(CharBuffer cb, ByteBuffer bb, int escSeq, CodingErrorAction errorAction) { encoder.onMalformedInput(errorAction) .onUnmappableCharacter(errorAction) .reset(); int cbmark = cb.position(); int bbmark = bb.position(); try { escSeq(bb, escSeq); CoderResult cr = encoder.encode(cb, bb, true); if (!cr.isUnderflow()) cr.throwException(); cr = encoder.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { cb.position(cbmark); bb.position(bbmark); return false; } return true; } private static void escSeq(ByteBuffer bb, int seq) { if (seq == 0) return; bb.put((byte) 0x1b); int b1 = seq >> 16; if (b1 != 0) bb.put((byte) b1); bb.put((byte) (seq >> 8)); bb.put((byte) seq); } public byte[] replacement() { return encoder.replacement(); } } private static final class ISO2022 extends SpecificCharacterSet { private ISO2022(Codec[] charsetInfos, String... codes) { super(charsetInfos, codes); } @Override public byte[] encode(String val, String delimiters) { int strlen = val.length(); CharBuffer cb = CharBuffer.wrap(val.toCharArray()); Encoder enc1 = encoder(cachedEncoder1, codecs[0]); byte[] buf = new byte[strlen]; ByteBuffer bb = ByteBuffer.wrap(buf); // try to encode whole string value with character set specified // by value1 of (0008,0005) Specific Character Set if (!enc1.encode(cb, bb, 0, CodingErrorAction.REPORT)) { // split whole string value according VR specific delimiters // and try to encode each component separately Encoder[] encs = new Encoder[codecs.length]; encs[0] = enc1; encs[1] = encoder(cachedEncoder2, codecs[1]); StringTokenizer comps = new StringTokenizer(val, delimiters, true); buf = new byte[2 * strlen + 4 * (comps.countTokens() + 1)]; bb = ByteBuffer.wrap(buf); int[] cur = { 0, 0 }; while (comps.hasMoreTokens()) { String comp = comps.nextToken(); if (comp.length() == 1 && delimiters.indexOf(comp.charAt(0)) >= 0) { // if delimiter activateInitialCharacterSet(bb, cur); bb.put((byte) comp.charAt(0)); continue; } cb = CharBuffer.wrap(comp.toCharArray()); encodeComponent(encs, cb, bb, cur); } activateInitialCharacterSet(bb, cur); } return Arrays.copyOf(buf, bb.position()); } private void encodeComponent(Encoder[] encs, CharBuffer cb, ByteBuffer bb, int[] cur) { // try to encode component with current active character of G1 if (codecs[cur[1]].getEscSeq1() != 0 && encs[cur[1]].encode(cb, bb, 0, CodingErrorAction.REPORT)) return; // try to encode component with current active character set of G0, if different to G1 if ((codecs[cur[1]].getEscSeq1() == 0 || codecs[cur[1]].getEscSeq0() != codecs[cur[0]].getEscSeq0()) && encs[cur[0]].encode(cb, bb, 0, CodingErrorAction.REPORT)) return; int next = encs.length; while (--next >= 0) { if (encs[next] == null) encs[next] = new Encoder(codecs[next]); if (codecs[next].getEscSeq1() != 0) { if (encs[next].encode(cb, bb, codecs[next].getEscSeq1(), CodingErrorAction.REPORT)) { cur[1] = next; break; } } else { if (encs[next].encode(cb, bb, codecs[next].getEscSeq0(), CodingErrorAction.REPORT)) { cur[0] = next; break; } } } if (next < 0) { if (cb.length() > 1) { for (int i = 0; i < cb.length(); i++) { encodeComponent(encs, cb.subSequence(i, i + 1), bb, cur); } } else { // character could not be encoded with any of the // specified character sets, encode it with the // current character set of G0, using the default // replacement of the character set decoder // for characters which cannot be encoded bb.put(encs[cur[0]].replacement()); } } } private void activateInitialCharacterSet(ByteBuffer bb, int[] cur) { if (cur[0] != 0) { Encoder.escSeq(bb, codecs[0].getEscSeq0()); cur[0] = 0; } if (cur[1] != 0) { Encoder.escSeq(bb, codecs[0].getEscSeq1()); cur[1] = 0; } } @Override public String decode(byte[] b) { Codec[] codec = { codecs[0], codecs[0] }; int g = 0; int off = 0; int cur = 0; StringBuilder sb = new StringBuilder(b.length); while (cur < b.length) { if (b[cur] == 0x1b) { // ESC if (off < cur) { sb.append(codec[g].decode(b, off, cur - off)); } cur += 3; switch (((b[cur - 2] & 255) << 8) + (b[cur - 1] & 255)) { case 0x2428: if (b[cur++] == 0x44) { codec[0] = Codec.JIS_X_212; } else { // decode invalid ESC sequence as chars sb.append(codec[0].decode(b, cur - 4, 4)); } break; case 0x2429: switch (b[cur++]) { case 0x41: switchCodec(codec, 1, Codec.GB2312); break; case 0x43: switchCodec(codec, 1, Codec.KS_X_1001); break; default: // decode invalid ESC sequence as chars sb.append(codec[0].decode(b, cur - 4, 4)); } break; case 0x2442: codec[0] = Codec.JIS_X_208; break; case 0x2842: switchCodec(codec, 0, Codec.ISO_646); break; case 0x284a: codec[0] = Codec.JIS_X_201; if (codec[1].getEscSeq1() == 0) codec[1] = codec[0]; break; case 0x2949: codec[1] = Codec.JIS_X_201; break; case 0x2d41: switchCodec(codec, 1, Codec.ISO_8859_1); break; case 0x2d42: switchCodec(codec, 1, Codec.ISO_8859_2); break; case 0x2d43: switchCodec(codec, 1, Codec.ISO_8859_3); break; case 0x2d44: switchCodec(codec, 1, Codec.ISO_8859_4); break; case 0x2d46: switchCodec(codec, 1, Codec.ISO_8859_7); break; case 0x2d47: switchCodec(codec, 1, Codec.ISO_8859_6); break; case 0x2d48: switchCodec(codec, 1, Codec.ISO_8859_8); break; case 0x2d4c: switchCodec(codec, 1, Codec.ISO_8859_5); break; case 0x2d4d: switchCodec(codec, 1, Codec.ISO_8859_9); break; case 0x2d54: switchCodec(codec, 1, Codec.TIS_620); break; default: // decode invalid ESC sequence as chars sb.append(codec[0].decode(b, cur - 3, 3)); } off = cur; } else { if (codec[0] != codec[1] && g == (b[cur] < 0 ? 0 : 1)) { if (off < cur) { sb.append(codec[g].decode(b, off, cur - off)); } off = cur; g = 1 - g; } int bytesPerChar = codec[g].getBytesPerChar(); cur += bytesPerChar > 0 ? bytesPerChar : b[cur] < 0 ? 2 : 1; } } if (off < cur) { sb.append(codec[g].decode(b, off, cur - off)); } return sb.toString(); } private void switchCodec(Codec[] codecs, int i, Codec codec) { codecs[i] = codec; if (codecs[0].getEscSeq0() == codecs[1].getEscSeq0()) codecs[0] = codecs[1]; } } public static SpecificCharacterSet getDefaultCharacterSet() { return DEFAULT; } public static void setDefaultCharacterSet(String code) { SpecificCharacterSet cs = code != null ? valueOf(code) : ASCII; if (!cs.containsASCII()) throw new IllegalArgumentException("Default Character Set must contain ASCII - " + code); DEFAULT = cs; } public static SpecificCharacterSet valueOf(String... codes) { if (codes == null || codes.length == 0) return DEFAULT; Codec[] infos = new Codec[codes.length]; for (int i = 0; i < codes.length; i++) infos[i] = Codec.forCode(codes[i]); return codes.length > 1 ? new ISO2022(infos,codes) : new SpecificCharacterSet(infos, codes); } public String[] toCodes () { return dicomCodes; } private static Encoder encoder(ThreadLocal<SoftReference<Encoder>> tl, Codec codec) { SoftReference<Encoder> sr; Encoder enc; if ((sr = tl.get()) == null || (enc = sr.get()) == null || enc.codec != codec) tl.set(new SoftReference<Encoder>(enc = new Encoder(codec))); return enc; } protected SpecificCharacterSet(Codec[] codecs, String... codes) { this.codecs = codecs; this.dicomCodes = codes; } public byte[] encode(String val, String delimiters) { return codecs[0].encode(val); } public String decode(byte[] val) { return codecs[0].decode(val, 0, val.length); } public boolean isUTF8() { return codecs[0].equals(Codec.UTF_8); } public boolean isASCII() { return codecs[0].equals(Codec.ISO_646); } public boolean containsASCII() { return codecs[0].containsASCII(); } public String toText(String s) { return codecs[0].toText(s); } @Override public boolean equals(Object other) { if (other == null) { return false; } if (getClass() != other.getClass()) { return false; } final SpecificCharacterSet othercs = (SpecificCharacterSet) other; return Arrays.equals(this.codecs,othercs.codecs); } @Override public int hashCode() { return Arrays.hashCode(this.codecs); } }