package com.jediterm.terminal; import java.util.Arrays; import com.google.common.base.Ascii; import com.jediterm.terminal.display.CharBuffer; import com.jediterm.terminal.emulator.charset.CharacterSets; public class CharacterUtils { public static final int ESC = Ascii.ESC; public static final int DEL = Ascii.DEL; private CharacterUtils() { } private static final String[] NONPRINTING_NAMES = {"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "TAB", "LF", "VT", "FF", "CR", "S0", "S1", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US"}; public static byte[] VT102_RESPONSE = makeCode(ESC, '[', '?', '6', 'c'); public static String getNonControlCharacters(int maxChars, char[] buf, int offset, int charsLength) { int len = maxChars > charsLength ? charsLength : maxChars; final int origLen = len; char tmp; while (len > 0) { tmp = buf[offset++]; if (0x20 <= tmp) { //stop when we reach control chars len--; continue; } offset--; break; } int length = origLen - len; return new String(buf, offset - length, length); } public enum CharacterType { NONPRINTING, PRINTING, NONASCII, NONE } public static CharacterType appendChar(final StringBuilder sb, final CharacterType last, final char c) { if (c <= 0x1F) { sb.append(' '); sb.append(NONPRINTING_NAMES[c]); return CharacterType.NONPRINTING; } else if (c == DEL) { sb.append(" DEL"); return CharacterType.NONPRINTING; } else if (c > 0x1F && c <= 0x7E) { if (last != CharacterType.PRINTING) sb.append(' '); sb.append(c); return CharacterType.PRINTING; } else { sb.append(" 0x").append(Integer.toHexString(c)); return CharacterType.NONASCII; } } public static void appendBuf(final StringBuilder sb, final char[] bs, final int begin, final int length) { CharacterType last = CharacterType.NONPRINTING; final int end = begin + length; for (int i = begin; i < end; i++) { final char c = (char)bs[i]; last = appendChar(sb, last, c); } } public static byte[] makeCode(final int... bytesAsInt) { final byte[] bytes = new byte[bytesAsInt.length]; int i = 0; for (final int byteAsInt : bytesAsInt) { bytes[i] = (byte)byteAsInt; i++; } return bytes; } /** * Computes text length as sum of characters length, treating double-width(full-width) characters as 2, normal-width(half-width) as 1 * (Read http://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms) */ public static int getTextLength(char[] buffer, int start, int length) { int result = 0; for (int i = start; i < start + length; i++) { result += isDoubleWidthCharacter(buffer[i]) ? 2 : 1; } return result; } /* * Following code is taken from iTerm2: NSStringITerm.m */ private static final int ambiguous_chars[] = { 0xa1, 0xa4, 0xa7, 0xa8, 0xaa, 0xad, 0xae, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc6, 0xd0, 0xd7, 0xd8, 0xde, 0xdf, 0xe0, 0xe1, 0xe6, 0xe8, 0xe9, 0xea, 0xec, 0xed, 0xf0, 0xf2, 0xf3, 0xf7, 0xf8, 0xf9, 0xfa, 0xfc, 0xfe, 0x101, 0x111, 0x113, 0x11b, 0x126, 0x127, 0x12b, 0x131, 0x132, 0x133, 0x138, 0x13f, 0x140, 0x141, 0x142, 0x144, 0x148, 0x149, 0x14a, 0x14b, 0x14d, 0x152, 0x153, 0x166, 0x167, 0x16b, 0x1ce, 0x1d0, 0x1d2, 0x1d4, 0x1d6, 0x1d8, 0x1da, 0x1dc, 0x251, 0x261, 0x2c4, 0x2c7, 0x2c9, 0x2ca, 0x2cb, 0x2cd, 0x2d0, 0x2d8, 0x2d9, 0x2da, 0x2db, 0x2dd, 0x2df, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7, 0x3c8, 0x3c9, 0x401, 0x451, 0x2010, 0x2013, 0x2014, 0x2015, 0x2016, 0x2018, 0x2019, 0x201c, 0x201d, 0x2020, 0x2021, 0x2022, 0x2024, 0x2025, 0x2026, 0x2027, 0x2030, 0x2032, 0x2033, 0x2035, 0x203b, 0x203e, 0x2074, 0x207f, 0x2081, 0x2082, 0x2083, 0x2084, 0x20ac, 0x2103, 0x2105, 0x2109, 0x2113, 0x2116, 0x2121, 0x2122, 0x2126, 0x212b, 0x2153, 0x2154, 0x215b, 0x215c, 0x215d, 0x215e, 0x2189, 0x21b8, 0x21b9, 0x21d2, 0x21d4, 0x21e7, 0x2200, 0x2202, 0x2203, 0x2207, 0x2208, 0x220b, 0x220f, 0x2211, 0x2215, 0x221a, 0x221d, 0x221e, 0x221f, 0x2220, 0x2223, 0x2225, 0x2227, 0x2228, 0x2229, 0x222a, 0x222b, 0x222c, 0x222e, 0x2234, 0x2235, 0x2236, 0x2237, 0x223c, 0x223d, 0x2248, 0x224c, 0x2252, 0x2260, 0x2261, 0x2264, 0x2265, 0x2266, 0x2267, 0x226a, 0x226b, 0x226e, 0x226f, 0x2282, 0x2283, 0x2286, 0x2287, 0x2295, 0x2299, 0x22a5, 0x22bf, 0x2312, 0x2592, 0x2593, 0x2594, 0x2595, 0x25a0, 0x25a1, 0x25a3, 0x25a4, 0x25a5, 0x25a6, 0x25a7, 0x25a8, 0x25a9, 0x25b2, 0x25b3, 0x25b6, 0x25b7, 0x25bc, 0x25bd, 0x25c0, 0x25c1, 0x25c6, 0x25c7, 0x25c8, 0x25cb, 0x25ce, 0x25cf, 0x25d0, 0x25d1, 0x25e2, 0x25e3, 0x25e4, 0x25e5, 0x25ef, 0x2605, 0x2606, 0x2609, 0x260e, 0x260f, 0x2614, 0x2615, 0x261c, 0x261e, 0x2640, 0x2642, 0x2660, 0x2661, 0x2663, 0x2664, 0x2665, 0x2667, 0x2668, 0x2669, 0x266a, 0x266c, 0x266d, 0x266f, 0x269e, 0x269f, 0x26be, 0x26bf, 0x26e3, 0x273d, 0x2757, 0x2b55, 0x2b56, 0x2b57, 0x2b58, 0x2b59, 0xfffd // This is not a complete list - there are also several large ranges that // are found in the code. }; private final static int AMB_CHAR_NUMBER = ambiguous_chars.length; public static boolean isDoubleWidthCharacter(char unicode) { boolean ambiguousIsDoubleWidth = false; boolean YES = true; boolean NO = false; if (unicode <= 0xa0 || (unicode > 0x452 && unicode < 0x1100)) { // Quickly cover the common cases. return false; } // This list of fullwidth and wide characters comes from Unicode 6.0: // http://www.unicode.org/Public/6.0.0/ucd/EastAsianWidth.txt if ((unicode >= 0x1100 && unicode <= 0x115f) || (unicode >= 0x11a3 && unicode <= 0x11a7) || (unicode >= 0x11fa && unicode <= 0x11ff) || (unicode >= 0x2329 && unicode <= 0x232a) || (unicode >= 0x2e80 && unicode <= 0x2e99) || (unicode >= 0x2e9b && unicode <= 0x2ef3) || (unicode >= 0x2f00 && unicode <= 0x2fd5) || (unicode >= 0x2ff0 && unicode <= 0x2ffb) || (unicode >= 0x3000 && unicode <= 0x303e) || (unicode >= 0x3041 && unicode <= 0x3096) || (unicode >= 0x3099 && unicode <= 0x30ff) || (unicode >= 0x3105 && unicode <= 0x312d) || (unicode >= 0x3131 && unicode <= 0x318e) || (unicode >= 0x3190 && unicode <= 0x31ba) || (unicode >= 0x31c0 && unicode <= 0x31e3) || (unicode >= 0x31f0 && unicode <= 0x321e) || (unicode >= 0x3220 && unicode <= 0x3247) || (unicode >= 0x3250 && unicode <= 0x32fe) || (unicode >= 0x3300 && unicode <= 0x4dbf) || (unicode >= 0x4e00 && unicode <= 0xa48c) || (unicode >= 0xa490 && unicode <= 0xa4c6) || (unicode >= 0xa960 && unicode <= 0xa97c) || (unicode >= 0xac00 && unicode <= 0xd7a3) || (unicode >= 0xd7b0 && unicode <= 0xd7c6) || (unicode >= 0xd7cb && unicode <= 0xd7fb) || (unicode >= 0xf900 && unicode <= 0xfaff) || (unicode >= 0xfe10 && unicode <= 0xfe19) || (unicode >= 0xfe30 && unicode <= 0xfe52) || (unicode >= 0xfe54 && unicode <= 0xfe66) || (unicode >= 0xfe68 && unicode <= 0xfe6b) || (unicode >= 0xff01 && unicode <= 0xff60) || (unicode >= 0xffe0 && unicode <= 0xffe6) || (unicode >= 0x1b000 && unicode <= 0x1b001) || (unicode >= 0x1f200 && unicode <= 0x1f202) || (unicode >= 0x1f210 && unicode <= 0x1f23a) || (unicode >= 0x1f240 && unicode <= 0x1f248) || (unicode >= 0x1f250 && unicode <= 0x1f251) || (unicode >= 0x20000 && unicode <= 0x2fffd) || (unicode >= 0x30000 && unicode <= 0x3fffd)) { return YES; } // These are the ambiguous-width characters (ibid.) if (ambiguousIsDoubleWidth) { // First check if the character falls in any range of consecutive // ambiguous-width characters before performing the binary search. // This keeps the list from being absurdly large. if ((unicode >= 0x300 && unicode <= 0x36f) || (unicode >= 0x391 && unicode <= 0x3a1) || (unicode >= 0x3b1 && unicode <= 0x3c1) || (unicode >= 0x410 && unicode <= 0x44f) || (unicode >= 0x2160 && unicode <= 0x216b) || (unicode >= 0x2170 && unicode <= 0x2179) || (unicode >= 0x2190 && unicode <= 0x2199) || (unicode >= 0x2460 && unicode <= 0x24e9) || (unicode >= 0x24eb && unicode <= 0x254b) || (unicode >= 0x2550 && unicode <= 0x2573) || (unicode >= 0x2580 && unicode <= 0x258f) || (unicode >= 0x26c4 && unicode <= 0x26cd) || (unicode >= 0x26cf && unicode <= 0x26e1) || (unicode >= 0x26e8 && unicode <= 0x26ff) || (unicode >= 0x2776 && unicode <= 0x277f) || (unicode >= 0x3248 && unicode <= 0x324f) || (unicode >= 0xe000 && unicode <= 0xf8ff) || (unicode >= 0xfe00 && unicode <= 0xfe0f) || (unicode >= 0x1f100 && unicode <= 0x1f10a) || (unicode >= 0x1f110 && unicode <= 0x1f12d) || (unicode >= 0x1f130 && unicode <= 0x1f169) || (unicode >= 0x1f170 && unicode <= 0x1f19a) || (unicode >= 0xe0100 && unicode <= 0xe01ef) || (unicode >= 0xf0000 && unicode <= 0xffffd) || (unicode >= 0x100000 && unicode <= 0x10fffd)) { return YES; } // Now do a binary search of the individual ambiguous width code points // in the array above. int ind = AMB_CHAR_NUMBER / 2; int start = 0; int end = AMB_CHAR_NUMBER; while (start < end) { if (ambiguous_chars[ind] == unicode) { return YES; } else if (ambiguous_chars[ind] < unicode) { start = ind + 1; ind = (start + end) / 2; } else { end = ind; ind = (start + end) / 2; } } // Fall through if not in ambiguous character list. } return NO; } public static CharBuffer heavyDecCompatibleBuffer(CharBuffer buf) { char[] c = Arrays.copyOfRange(buf.getBuf(), 0, buf.getBuf().length); for(int i = 0; i < c.length; i++) { c[i] = CharacterSets.getHeavyDecSpecialChar(c[i]); } return new CharBuffer(c, buf.getStart(), buf.getLength()); } }