CharacterUtils.java example

Explorer
domainmath-ide-master
package com.jediterm.terminal;

import java.util.Arrays;

import com.google.common.base.Ascii;
import com.jediterm.terminal.display.CharBuffer;
import com.jediterm.terminal.emulator.charset.CharacterSets;

public class CharacterUtils {

  public static final int ESC = Ascii.ESC;
  public static final int DEL = Ascii.DEL;

  private CharacterUtils() {
  }

  private static final String[] NONPRINTING_NAMES = {"NUL", "SOH", "STX", "ETX", "EOT", "ENQ",
    "ACK", "BEL", "BS", "TAB", "LF", "VT", "FF", "CR", "S0", "S1",
    "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", "CAN",
    "EM", "SUB", "ESC", "FS", "GS", "RS", "US"};

  public static byte[] VT102_RESPONSE = makeCode(ESC, '[', '?', '6', 'c');

  public static String getNonControlCharacters(int maxChars, char[] buf, int offset, int charsLength) {
    int len = maxChars > charsLength ? charsLength : maxChars;

    final int origLen = len;
    char tmp;
    while (len > 0) {
      tmp = buf[offset++];
      if (0x20 <= tmp) { //stop when we reach control chars
        len--;
        continue;
      }
      offset--;
      break;
    }

    int length = origLen - len;

    return new String(buf, offset - length, length);
  }

  public enum CharacterType {
    NONPRINTING,
    PRINTING,
    NONASCII, NONE
  }

  public static CharacterType appendChar(final StringBuilder sb, final CharacterType last, final char c) {
    if (c <= 0x1F) {
      sb.append(' ');
      sb.append(NONPRINTING_NAMES[c]);
      return CharacterType.NONPRINTING;
    }
    else if (c == DEL) {
      sb.append(" DEL");
      return CharacterType.NONPRINTING;
    }
    else if (c > 0x1F && c <= 0x7E) {
      if (last != CharacterType.PRINTING) sb.append(' ');
      sb.append(c);
      return CharacterType.PRINTING;
    }
    else {
      sb.append(" 0x").append(Integer.toHexString(c));
      return CharacterType.NONASCII;
    }
  }

  public static void appendBuf(final StringBuilder sb, final char[] bs, final int begin, final int length) {
    CharacterType last = CharacterType.NONPRINTING;
    final int end = begin + length;
    for (int i = begin; i < end; i++) {
      final char c = (char)bs[i];
      last = appendChar(sb, last, c);
    }
  }


  public static byte[] makeCode(final int... bytesAsInt) {
    final byte[] bytes = new byte[bytesAsInt.length];
    int i = 0;
    for (final int byteAsInt : bytesAsInt) {
      bytes[i] = (byte)byteAsInt;
      i++;
    }
    return bytes;
  }

  /**
   * Computes text length as sum of characters length, treating double-width(full-width) characters as 2, normal-width(half-width) as 1
   * (Read http://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms)
   */
  public static int getTextLength(char[] buffer, int start, int length) {
    int result = 0;
    for (int i = start; i < start + length; i++) {
      result += isDoubleWidthCharacter(buffer[i]) ? 2 : 1;
    }
    return result;
  }
  
   /*
   * Following code is taken from iTerm2: NSStringITerm.m 
   */

  private static final int ambiguous_chars[] = {
    0xa1, 0xa4, 0xa7, 0xa8, 0xaa, 0xad, 0xae, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7,
    0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc6, 0xd0, 0xd7, 0xd8, 0xde, 0xdf, 0xe0,
    0xe1, 0xe6, 0xe8, 0xe9, 0xea, 0xec, 0xed, 0xf0, 0xf2, 0xf3, 0xf7, 0xf8, 0xf9, 0xfa,
    0xfc, 0xfe, 0x101, 0x111, 0x113, 0x11b, 0x126, 0x127, 0x12b, 0x131, 0x132, 0x133,
    0x138, 0x13f, 0x140, 0x141, 0x142, 0x144, 0x148, 0x149, 0x14a, 0x14b, 0x14d, 0x152,
    0x153, 0x166, 0x167, 0x16b, 0x1ce, 0x1d0, 0x1d2, 0x1d4, 0x1d6, 0x1d8, 0x1da, 0x1dc,
    0x251, 0x261, 0x2c4, 0x2c7, 0x2c9, 0x2ca, 0x2cb, 0x2cd, 0x2d0, 0x2d8, 0x2d9, 0x2da,
    0x2db, 0x2dd, 0x2df, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7, 0x3a8, 0x3a9, 0x3c3, 0x3c4,
    0x3c5, 0x3c6, 0x3c7, 0x3c8, 0x3c9, 0x401, 0x451, 0x2010, 0x2013, 0x2014, 0x2015,
    0x2016, 0x2018, 0x2019, 0x201c, 0x201d, 0x2020, 0x2021, 0x2022, 0x2024, 0x2025,
    0x2026, 0x2027, 0x2030, 0x2032, 0x2033, 0x2035, 0x203b, 0x203e, 0x2074, 0x207f,
    0x2081, 0x2082, 0x2083, 0x2084, 0x20ac, 0x2103, 0x2105, 0x2109, 0x2113, 0x2116,
    0x2121, 0x2122, 0x2126, 0x212b, 0x2153, 0x2154, 0x215b, 0x215c, 0x215d, 0x215e,
    0x2189, 0x21b8, 0x21b9, 0x21d2, 0x21d4, 0x21e7, 0x2200, 0x2202, 0x2203, 0x2207,
    0x2208, 0x220b, 0x220f, 0x2211, 0x2215, 0x221a, 0x221d, 0x221e, 0x221f, 0x2220,
    0x2223, 0x2225, 0x2227, 0x2228, 0x2229, 0x222a, 0x222b, 0x222c, 0x222e, 0x2234,
    0x2235, 0x2236, 0x2237, 0x223c, 0x223d, 0x2248, 0x224c, 0x2252, 0x2260, 0x2261,
    0x2264, 0x2265, 0x2266, 0x2267, 0x226a, 0x226b, 0x226e, 0x226f, 0x2282, 0x2283,
    0x2286, 0x2287, 0x2295, 0x2299, 0x22a5, 0x22bf, 0x2312, 0x2592, 0x2593, 0x2594,
    0x2595, 0x25a0, 0x25a1, 0x25a3, 0x25a4, 0x25a5, 0x25a6, 0x25a7, 0x25a8, 0x25a9,
    0x25b2, 0x25b3, 0x25b6, 0x25b7, 0x25bc, 0x25bd, 0x25c0, 0x25c1, 0x25c6, 0x25c7,
    0x25c8, 0x25cb, 0x25ce, 0x25cf, 0x25d0, 0x25d1, 0x25e2, 0x25e3, 0x25e4, 0x25e5,
    0x25ef, 0x2605, 0x2606, 0x2609, 0x260e, 0x260f, 0x2614, 0x2615, 0x261c, 0x261e,
    0x2640, 0x2642, 0x2660, 0x2661, 0x2663, 0x2664, 0x2665, 0x2667, 0x2668, 0x2669,
    0x266a, 0x266c, 0x266d, 0x266f, 0x269e, 0x269f, 0x26be, 0x26bf, 0x26e3, 0x273d,
    0x2757, 0x2b55, 0x2b56, 0x2b57, 0x2b58, 0x2b59, 0xfffd
    // This is not a complete list - there are also several large ranges that
    // are found in the code.
  };

  private final static int AMB_CHAR_NUMBER = ambiguous_chars.length;

  public static boolean isDoubleWidthCharacter(char unicode) {
    boolean ambiguousIsDoubleWidth = false;

    boolean YES = true;
    boolean NO = false;

    if (unicode <= 0xa0 ||
        (unicode > 0x452 && unicode < 0x1100)) {
      // Quickly cover the common cases.
      return false;
    }

    // This list of fullwidth and wide characters comes from Unicode 6.0:
    // http://www.unicode.org/Public/6.0.0/ucd/EastAsianWidth.txt
    if ((unicode >= 0x1100 && unicode <= 0x115f) ||
        (unicode >= 0x11a3 && unicode <= 0x11a7) ||
        (unicode >= 0x11fa && unicode <= 0x11ff) ||
        (unicode >= 0x2329 && unicode <= 0x232a) ||
        (unicode >= 0x2e80 && unicode <= 0x2e99) ||
        (unicode >= 0x2e9b && unicode <= 0x2ef3) ||
        (unicode >= 0x2f00 && unicode <= 0x2fd5) ||
        (unicode >= 0x2ff0 && unicode <= 0x2ffb) ||
        (unicode >= 0x3000 && unicode <= 0x303e) ||
        (unicode >= 0x3041 && unicode <= 0x3096) ||
        (unicode >= 0x3099 && unicode <= 0x30ff) ||
        (unicode >= 0x3105 && unicode <= 0x312d) ||
        (unicode >= 0x3131 && unicode <= 0x318e) ||
        (unicode >= 0x3190 && unicode <= 0x31ba) ||
        (unicode >= 0x31c0 && unicode <= 0x31e3) ||
        (unicode >= 0x31f0 && unicode <= 0x321e) ||
        (unicode >= 0x3220 && unicode <= 0x3247) ||
        (unicode >= 0x3250 && unicode <= 0x32fe) ||
        (unicode >= 0x3300 && unicode <= 0x4dbf) ||
        (unicode >= 0x4e00 && unicode <= 0xa48c) ||
        (unicode >= 0xa490 && unicode <= 0xa4c6) ||
        (unicode >= 0xa960 && unicode <= 0xa97c) ||
        (unicode >= 0xac00 && unicode <= 0xd7a3) ||
        (unicode >= 0xd7b0 && unicode <= 0xd7c6) ||
        (unicode >= 0xd7cb && unicode <= 0xd7fb) ||
        (unicode >= 0xf900 && unicode <= 0xfaff) ||
        (unicode >= 0xfe10 && unicode <= 0xfe19) ||
        (unicode >= 0xfe30 && unicode <= 0xfe52) ||
        (unicode >= 0xfe54 && unicode <= 0xfe66) ||
        (unicode >= 0xfe68 && unicode <= 0xfe6b) ||
        (unicode >= 0xff01 && unicode <= 0xff60) ||
        (unicode >= 0xffe0 && unicode <= 0xffe6) ||
        (unicode >= 0x1b000 && unicode <= 0x1b001) ||
        (unicode >= 0x1f200 && unicode <= 0x1f202) ||
        (unicode >= 0x1f210 && unicode <= 0x1f23a) ||
        (unicode >= 0x1f240 && unicode <= 0x1f248) ||
        (unicode >= 0x1f250 && unicode <= 0x1f251) ||
        (unicode >= 0x20000 && unicode <= 0x2fffd) ||
        (unicode >= 0x30000 && unicode <= 0x3fffd)) {
      return YES;
    }

    // These are the ambiguous-width characters (ibid.)
    if (ambiguousIsDoubleWidth) {
      // First check if the character falls in any range of consecutive
      // ambiguous-width characters before performing the binary search.
      // This keeps the list from being absurdly large.
      if ((unicode >= 0x300 && unicode <= 0x36f) ||
          (unicode >= 0x391 && unicode <= 0x3a1) ||
          (unicode >= 0x3b1 && unicode <= 0x3c1) ||
          (unicode >= 0x410 && unicode <= 0x44f) ||
          (unicode >= 0x2160 && unicode <= 0x216b) ||
          (unicode >= 0x2170 && unicode <= 0x2179) ||
          (unicode >= 0x2190 && unicode <= 0x2199) ||
          (unicode >= 0x2460 && unicode <= 0x24e9) ||
          (unicode >= 0x24eb && unicode <= 0x254b) ||
          (unicode >= 0x2550 && unicode <= 0x2573) ||
          (unicode >= 0x2580 && unicode <= 0x258f) ||
          (unicode >= 0x26c4 && unicode <= 0x26cd) ||
          (unicode >= 0x26cf && unicode <= 0x26e1) ||
          (unicode >= 0x26e8 && unicode <= 0x26ff) ||
          (unicode >= 0x2776 && unicode <= 0x277f) ||
          (unicode >= 0x3248 && unicode <= 0x324f) ||
          (unicode >= 0xe000 && unicode <= 0xf8ff) ||
          (unicode >= 0xfe00 && unicode <= 0xfe0f) ||
          (unicode >= 0x1f100 && unicode <= 0x1f10a) ||
          (unicode >= 0x1f110 && unicode <= 0x1f12d) ||
          (unicode >= 0x1f130 && unicode <= 0x1f169) ||
          (unicode >= 0x1f170 && unicode <= 0x1f19a) ||
          (unicode >= 0xe0100 && unicode <= 0xe01ef) ||
          (unicode >= 0xf0000 && unicode <= 0xffffd) ||
          (unicode >= 0x100000 && unicode <= 0x10fffd)) {
        return YES;
      }

      // Now do a binary search of the individual ambiguous width code points
      // in the array above.
      int ind = AMB_CHAR_NUMBER / 2;
      int start = 0;
      int end = AMB_CHAR_NUMBER;
      while (start < end) {
        if (ambiguous_chars[ind] == unicode) {
          return YES;
        }
        else if (ambiguous_chars[ind] < unicode) {
          start = ind + 1;
          ind = (start + end) / 2;
        }
        else {
          end = ind;
          ind = (start + end) / 2;
        }
      }
      // Fall through if not in ambiguous character list.
    }

    return NO;
  }

  public static CharBuffer heavyDecCompatibleBuffer(CharBuffer buf) {
    char[] c = Arrays.copyOfRange(buf.getBuf(), 0, buf.getBuf().length);
    for(int i = 0; i < c.length; i++) {
      c[i] = CharacterSets.getHeavyDecSpecialChar(c[i]);
    }
    return new CharBuffer(c, buf.getStart(), buf.getLength());
  }

}