package jwbroek.util; public class CharsetUtil { public static boolean matchUTF8(byte[] chars) { return matchUTF8(chars, 0, chars.length); } public static boolean matchUTF8(byte[] chars, int pos, int len) { if (chars != null && pos >=0 && pos <= chars.length-len) for (int i = pos; i < pos+len; i++) { byte b = chars[i]; int n; if ((b & 255) < 0x80) n = 0; // 0bbbbbbb else if ((b & 0xE0) == 0xC0) n = 1; // 110bbbbb else if ((b & 0xF0) == 0xE0) n = 2; // 1110bbbb else if ((b & 0xF8) == 0xF0) n = 3; // 11110bbb else if ((b & 0xFC) == 0xF8) n = 4; // 111110bb else if ((b & 0xFE) == 0xFC) n = 5; // 1111110b else return false; // Does not match any model //System.out.printf("0%x - %d%n", b, n); for (int j = 0; j < n; j++) { // n bytes matching 10bbbbbb follow ? if ((++i == pos+len) || ((chars[i] & 0xC0) != 0x80)) return false; } if (n > 0) return true; } return false; } }