package com.termux.terminal; import java.io.UnsupportedEncodingException; public class UnicodeInputTest extends TerminalTestCase { public void testIllFormedUtf8SuccessorByteNotConsumed() throws Exception { // The Unicode Standard Version 6.2 – Core Specification (http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf): // "If the converter encounters an ill-formed UTF-8 code unit sequence which starts with a valid first byte, but which does not // continue with valid successor bytes (see Table 3-7), it must not consume the successor bytes as part of the ill-formed // subsequence whenever those successor bytes themselves constitute part of a well-formed UTF-8 code unit subsequence." withTerminalSized(5, 5); mTerminal.append(new byte[]{(byte) 0b11101111, (byte) 'a'}, 2); assertLineIs(0, ((char) TerminalEmulator.UNICODE_REPLACEMENT_CHAR) + "a "); // https://code.google.com/p/chromium/issues/detail?id=212704 byte[] input = new byte[]{ (byte) 0x61, (byte) 0xF1, (byte) 0x80, (byte) 0x80, (byte) 0xe1, (byte) 0x80, (byte) 0xc2, (byte) 0x62, (byte) 0x80, (byte) 0x63, (byte) 0x80, (byte) 0xbf, (byte) 0x64 }; withTerminalSized(10, 2); mTerminal.append(input, input.length); assertLinesAre("a\uFFFD\uFFFD\uFFFDb\uFFFDc\uFFFD\uFFFDd", " "); // Surrogate pairs. withTerminalSized(5, 2); input = new byte[]{ (byte) 0xed, (byte) 0xa0, (byte) 0x80, (byte) 0xed, (byte) 0xad, (byte) 0xbf, (byte) 0xed, (byte) 0xae, (byte) 0x80, (byte) 0xed, (byte) 0xbf, (byte) 0xbf }; mTerminal.append(input, input.length); assertLinesAre("\uFFFD\uFFFD\uFFFD\uFFFD ", " "); // https://bugzilla.mozilla.org/show_bug.cgi?id=746900: "with this patch 0xe0 0x80 is decoded as two U+FFFDs, // but 0xe0 0xa0 is decoded as a single U+FFFD, and this is correct according to the "Best Practices", but IE // and Chrome (Version 22.0.1229.94) decode both of them as two U+FFFDs. Opera 12.11 decodes both of them as // one U+FFFD". withTerminalSized(5, 2); input = new byte[]{(byte) 0xe0, (byte) 0xa0, ' '}; mTerminal.append(input, input.length); assertLinesAre("\uFFFD ", " "); // withTerminalSized(5, 2); // input = new byte[]{(byte) 0xe0, (byte) 0x80, 'a'}; // mTerminal.append(input, input.length); // assertLinesAre("\uFFFD\uFFFDa ", " "); } public void testUnassignedCodePoint() throws UnsupportedEncodingException { withTerminalSized(3, 3); // UTF-8 for U+C2541, an unassigned code point: byte[] b = new byte[]{(byte) 0xf3, (byte) 0x82, (byte) 0x95, (byte) 0x81}; mTerminal.append(b, b.length); enterString("Y"); assertEquals(1, Character.charCount(TerminalEmulator.UNICODE_REPLACEMENT_CHAR)); assertLineStartsWith(0, TerminalEmulator.UNICODE_REPLACEMENT_CHAR, (int) 'Y', ' '); } public void testStuff() { withTerminalSized(80, 24); byte[] b = new byte[]{(byte) 0xf3, (byte) 0x82, (byte) 0x95, (byte) 0x81, (byte) 0x61, (byte) 0x38, (byte) 0xe7, (byte) 0x8f, (byte) 0xae, (byte) 0xc2, (byte) 0x9f, (byte) 0xe8, (byte) 0xa0, (byte) 0x9f, (byte) 0xe8, (byte) 0x8c, (byte) 0xa4, (byte) 0xed, (byte) 0x93, (byte) 0x89, (byte) 0xef, (byte) 0xbf, (byte) 0xbd, (byte) 0x42, (byte) 0xc2, (byte) 0x9b, (byte) 0xe6, (byte) 0x87, (byte) 0x89, (byte) 0x5a}; mTerminal.append(b, b.length); } public void testSimpleCombining() throws Exception { withTerminalSized(3, 2).enterString(" a\u0302 ").assertLinesAre(" a\u0302 ", " "); } public void testCombiningCharacterInFirstColumn() throws Exception { withTerminalSized(5, 3).enterString("test\r\nhi\r\n").assertLinesAre("test ", "hi ", " "); // U+0302 is COMBINING CIRCUMFLEX ACCENT. Test case from mosh (http://mosh.mit.edu/). withTerminalSized(5, 5).enterString("test\r\nabc\r\n\u0302\r\ndef\r\n"); assertLinesAre("test ", "abc ", " \u0302 ", "def ", " "); } public void testCombiningCharacterInLastColumn() throws Exception { withTerminalSized(3, 2).enterString(" a\u0302").assertLinesAre(" a\u0302", " "); withTerminalSized(3, 2).enterString(" à̲").assertLinesAre(" à̲", " "); withTerminalSized(3, 2).enterString("Aà̲F").assertLinesAre("Aà̲F", " "); } public void testWideCharacterInLastColumn() throws Exception { withTerminalSized(3, 2).enterString(" 枝\u0302").assertLinesAre(" ", "枝\u0302 "); withTerminalSized(3, 2).enterString(" 枝").assertLinesAre(" 枝", " ").assertCursorAt(0, 2); enterString("a").assertLinesAre(" 枝", "a "); } public void testWideCharacterDeletion() throws Exception { // CSI Ps D Cursor Backward Ps Times withTerminalSized(3, 2).enterString("枝\033[Da").assertLinesAre(" a ", " "); withTerminalSized(3, 2).enterString("枝\033[2Da").assertLinesAre("a ", " "); withTerminalSized(3, 2).enterString("枝\033[2D枝").assertLinesAre("枝 ", " "); withTerminalSized(3, 2).enterString("枝\033[1D枝").assertLinesAre(" 枝", " "); withTerminalSized(5, 2).enterString(" 枝 \033[Da").assertLinesAre(" 枝a ", " "); withTerminalSized(5, 2).enterString("a \033[D\u0302").assertLinesAre("a\u0302 ", " "); withTerminalSized(5, 2).enterString("枝 \033[D\u0302").assertLinesAre("枝\u0302 ", " "); enterString("Z").assertLinesAre("枝\u0302Z ", " "); enterString("\033[D ").assertLinesAre("枝\u0302 ", " "); // Go back two columns, standing at the second half of the wide character: enterString("\033[2DU").assertLinesAre(" U ", " "); } public void testWideCharOverwriting() { withTerminalSized(3, 2).enterString("abc\033[3D枝").assertLinesAre("枝c", " "); } public void testOverlongUtf8Encoding() throws Exception { // U+0020 should be encoded as 0x20, 0xc0 0xa0 is an overlong encoding // so should be replaced with the replacement char U+FFFD. withTerminalSized(5, 5).mTerminal.append(new byte[]{(byte) 0xc0, (byte) 0xa0, 'Y'}, 3); assertLineIs(0, "\uFFFDY "); } public void testWideCharacterWithoutWrapping() throws Exception { // With wraparound disabled. The behaviour when a wide character is output with cursor in // the last column when autowrap is disabled is not obvious, but we expect the wide // character to be ignored here. withTerminalSized(3, 3).enterString("\033[?7l").enterString("枝枝枝").assertLinesAre("枝 ", " ", " "); enterString("a枝").assertLinesAre("枝a", " ", " "); } }