package com.fasterxml.jackson.core.json; import com.fasterxml.jackson.core.*; import com.fasterxml.jackson.core.io.SerializedString; import com.fasterxml.jackson.test.BaseTest; import java.io.*; import java.util.Random; /** * Set of basic unit tests for verifying that the basic parser * functionality works as expected. */ public class TestUtf8Parser extends BaseTest { final static String[] UTF8_2BYTE_STRINGS = new String[] { /* This may look funny, but UTF8 scanner has fairly * elaborate decoding machinery, and it is indeed * necessary to try out various combinations... */ "b", "A\u00D8", "abc", "c3p0", "12345", "......", "Long\u00FAer", "Latin1-fully-\u00BE-develop\u00A8d", "Some very long name, ridiculously long actually to see that buffer expansion works: \u00BF?" }; final static String[] UTF8_3BYTE_STRINGS = new String[] { "\uC823?", "A\u400F", "1\u1234?", "Ab123\u4034", "Even-longer:\uC023" }; public void testEmptyName() throws Exception { final String DOC = "{ \"\" : \"\" }"; JsonParser jp = createParserUsingStream(DOC, "UTF-8"); assertToken(JsonToken.START_OBJECT, jp.nextToken()); assertToken(JsonToken.FIELD_NAME, jp.nextToken()); assertEquals("", jp.getCurrentName()); assertToken(JsonToken.VALUE_STRING, jp.nextToken()); assertEquals("", jp.getText()); assertToken(JsonToken.END_OBJECT, jp.nextToken()); jp.close(); } public void testUtf8Name2Bytes() throws Exception { final String[] NAMES = UTF8_2BYTE_STRINGS; for (int i = 0; i < NAMES.length; ++i) { String NAME = NAMES[i]; String DOC = "{ \""+NAME+"\" : 0 }"; JsonParser jp = createParserUsingStream(DOC, "UTF-8"); assertToken(JsonToken.START_OBJECT, jp.nextToken()); assertToken(JsonToken.FIELD_NAME, jp.nextToken()); assertEquals(NAME, jp.getCurrentName()); assertToken(JsonToken.VALUE_NUMBER_INT, jp.nextToken()); // should retain name during value entry, too assertEquals(NAME, jp.getCurrentName()); assertToken(JsonToken.END_OBJECT, jp.nextToken()); jp.close(); } } public void testUtf8Name3Bytes() throws Exception { final String[] NAMES = UTF8_3BYTE_STRINGS; for (int i = 0; i < NAMES.length; ++i) { String NAME = NAMES[i]; String DOC = "{ \""+NAME+"\" : true }"; JsonParser jp = createParserUsingStream(DOC, "UTF-8"); assertToken(JsonToken.START_OBJECT, jp.nextToken()); assertToken(JsonToken.FIELD_NAME, jp.nextToken()); assertEquals(NAME, jp.getCurrentName()); assertToken(JsonToken.VALUE_TRUE, jp.nextToken()); assertEquals(NAME, jp.getCurrentName()); assertToken(JsonToken.END_OBJECT, jp.nextToken()); jp.close(); } } // How about tests for Surrogate-Pairs? public void testUtf8StringTrivial() throws Exception { String[] VALUES = UTF8_2BYTE_STRINGS; for (int i = 0; i < VALUES.length; ++i) { String VALUE = VALUES[i]; String DOC = "[ \""+VALUE+"\" ]"; JsonParser jp = createParserUsingStream(DOC, "UTF-8"); assertToken(JsonToken.START_ARRAY, jp.nextToken()); assertToken(JsonToken.VALUE_STRING, jp.nextToken()); String act = getAndVerifyText(jp); if (act.length() != VALUE.length()) { fail("Failed for value #"+(i+1)+"/"+VALUES.length+": length was "+act.length()+", should be "+VALUE.length()); } assertEquals(VALUE, act); assertToken(JsonToken.END_ARRAY, jp.nextToken()); jp.close(); } VALUES = UTF8_3BYTE_STRINGS; for (int i = 0; i < VALUES.length; ++i) { String VALUE = VALUES[i]; String DOC = "[ \""+VALUE+"\" ]"; JsonParser jp = createParserUsingStream(DOC, "UTF-8"); assertToken(JsonToken.START_ARRAY, jp.nextToken()); assertToken(JsonToken.VALUE_STRING, jp.nextToken()); assertEquals(VALUE, getAndVerifyText(jp)); assertToken(JsonToken.END_ARRAY, jp.nextToken()); jp.close(); } } public void testUtf8StringValue() throws Exception { Random r = new Random(13); //int LEN = 72000; int LEN = 720; StringBuilder sb = new StringBuilder(LEN + 20); while (sb.length() < LEN) { int c; if (r.nextBoolean()) { // ascii c = 32 + (r.nextInt() & 0x3F); if (c == '"' || c == '\\') { c = ' '; } } else if (r.nextBoolean()) { // 2-byte c = 160 + (r.nextInt() & 0x3FF); } else if (r.nextBoolean()) { // 3-byte (non-surrogate) c = 8000 + (r.nextInt() & 0x7FFF); } else { // surrogates (2 chars) int value = r.nextInt() & 0x3FFFF; // 20-bit, ~ 1 million sb.append((char) (0xD800 + (value >> 10))); c = (0xDC00 + (value & 0x3FF)); } sb.append((char) c); } ByteArrayOutputStream bout = new ByteArrayOutputStream(LEN); OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8"); out.write("[\""); String VALUE = sb.toString(); out.write(VALUE); out.write("\"]"); out.close(); byte[] data = bout.toByteArray(); JsonParser jp = new JsonFactory().createParser(new ByteArrayInputStream(data)); assertToken(JsonToken.START_ARRAY, jp.nextToken()); assertToken(JsonToken.VALUE_STRING, jp.nextToken()); String act = jp.getText(); assertEquals(VALUE.length(), act.length()); assertEquals(VALUE, act); jp.close(); } // [JACKSON-889] public void testNextFieldName() throws IOException { JsonFactory f = new JsonFactory(); SerializedString id = new SerializedString("id"); ByteArrayOutputStream os = new ByteArrayOutputStream(); os.write('{'); for (int i = 0; i < 3994; i++) { os.write(' '); } os.write("\"id\":2".getBytes("UTF-8")); os.write('}'); JsonParser parser = f.createParser(new ByteArrayInputStream(os.toByteArray())); assertEquals(parser.nextToken(), JsonToken.START_OBJECT); assertTrue(parser.nextFieldName(id)); assertEquals(parser.nextToken(), JsonToken.VALUE_NUMBER_INT); assertEquals(parser.nextToken(), JsonToken.END_OBJECT); parser.close(); } }