package lejos.charset; public class UTF8Decoder implements CharsetDecoder { private static final int MIN_NON_ASCII = 0x80; private static final int MIN_UTF8_SEQ2 = 0xC0; private static final int MIN_UTF8_SEQ3 = 0xE0; private static final int MIN_UTF8_SEQ4 = 0xF0; private static final int MIN_UTF8_SEQ5 = 0xF8; public int decode(byte[] source, int offset, int limit) { // assert limit > offset; int first = source[offset] & 0xFF; if (first < MIN_NON_ASCII) return first; if (first < MIN_UTF8_SEQ2 || first >= MIN_UTF8_SEQ5) return '?'; int len; if (first < MIN_UTF8_SEQ3) len = 2; else if (first < MIN_UTF8_SEQ4) len = 3; else len = 4; if (len > limit - offset) return '?'; first &= 0x3F >> len; for (int i = 1; i < len; i++) { int b = source[offset + i]; if ((b & 0xC0) != 0x80) return '?'; first = (first << 6) | (b & 0x3F); } return first; } public int estimateByteCount(byte[] source, int offset, int limit) { if (offset >= limit) return 1; int first = source[offset] & 0xFF; if (first < MIN_UTF8_SEQ2 || first >= MIN_UTF8_SEQ5) return 1; int len; if (first < MIN_UTF8_SEQ3) len = 2; else if (first < MIN_UTF8_SEQ4) len = 3; else len = 4; // how many bytes must and can be tested? int maxlen = limit - offset; if (maxlen > len) maxlen = len; if (maxlen > 1 && (source[offset + 1] & 0xC0) != 0x80) return 1; if (maxlen > 2 && (source[offset + 2] & 0xC0) != 0x80) return 2; if (maxlen > 3 && (source[offset + 3] & 0xC0) != 0x80) return 3; // there's not enough in the buffer ... if (len > maxlen) // ... so return conservative estimate return maxlen + 1; return len; } public int getMaxCharLength() { return 4; } }