/* * Copyright (c) 2004, PostgreSQL Global Development Group * See the LICENSE file in the project root for more information. */ package org.postgresql.test.jdbc2; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import org.postgresql.core.Encoding; import org.postgresql.test.TestUtil; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.Statement; import java.util.Arrays; /* * Test case for various encoding problems. * * Ensure that we can do a round-trip of all server-supported unicode values without trashing them, * and that bad encodings are detected. */ public class DatabaseEncodingTest { private static final int STEP = 100; private Connection con; // Set up the fixture for this testcase: a connection to a database with // a table for this test. @Before public void setUp() throws Exception { con = TestUtil.openDB(); TestUtil.createTable(con, "testdbencoding", "unicode_ordinal integer primary key not null, unicode_string varchar(" + STEP + ")"); // disabling auto commit makes the test run faster // by not committing each insert individually. con.setAutoCommit(false); } // Tear down the fixture for this test case. @After public void tearDown() throws Exception { con.setAutoCommit(true); TestUtil.dropTable(con, "testdbencoding"); TestUtil.closeDB(con); } private static String dumpString(String s) { StringBuffer sb = new StringBuffer(s.length() * 6); for (int i = 0; i < s.length(); ++i) { sb.append("\\u"); char c = s.charAt(i); sb.append(Integer.toHexString((c >> 12) & 15)); sb.append(Integer.toHexString((c >> 8) & 15)); sb.append(Integer.toHexString((c >> 4) & 15)); sb.append(Integer.toHexString(c & 15)); } return sb.toString(); } @Test public void testEncoding() throws Exception { // Check that we have a UTF8 server encoding, or we must skip this test. Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery("SELECT getdatabaseencoding()"); assertTrue(rs.next()); String dbEncoding = rs.getString(1); if (!dbEncoding.equals("UTF8")) { System.err.println( "DatabaseEncodingTest: Skipping UTF8 database tests as test database encoding is " + dbEncoding); rs.close(); return; // not a UTF8 database. } rs.close(); boolean testHighUnicode = true; // Create data. // NB: we avoid d800-dfff as those are reserved for surrogates in UTF-16 PreparedStatement insert = con.prepareStatement( "INSERT INTO testdbencoding(unicode_ordinal, unicode_string) VALUES (?,?)"); for (int i = 1; i < 0xd800; i += STEP) { int count = (i + STEP) > 0xd800 ? 0xd800 - i : STEP; char[] testChars = new char[count]; for (int j = 0; j < count; ++j) { testChars[j] = (char) (i + j); } String testString = new String(testChars); insert.setInt(1, i); insert.setString(2, testString); assertEquals(1, insert.executeUpdate()); } for (int i = 0xe000; i < 0x10000; i += STEP) { int count = (i + STEP) > 0x10000 ? 0x10000 - i : STEP; char[] testChars = new char[count]; for (int j = 0; j < count; ++j) { testChars[j] = (char) (i + j); } String testString = new String(testChars); insert.setInt(1, i); insert.setString(2, testString); assertEquals(1, insert.executeUpdate()); } if (testHighUnicode) { for (int i = 0x10000; i < 0x110000; i += STEP) { int count = (i + STEP) > 0x110000 ? 0x110000 - i : STEP; char[] testChars = new char[count * 2]; for (int j = 0; j < count; ++j) { testChars[j * 2] = (char) (0xd800 + ((i + j - 0x10000) >> 10)); testChars[j * 2 + 1] = (char) (0xdc00 + ((i + j - 0x10000) & 0x3ff)); } String testString = new String(testChars); insert.setInt(1, i); insert.setString(2, testString); // System.err.println("Inserting: " + dumpString(testString)); assertEquals(1, insert.executeUpdate()); } } con.commit(); // Check data. stmt.setFetchSize(1); rs = stmt.executeQuery( "SELECT unicode_ordinal, unicode_string FROM testdbencoding ORDER BY unicode_ordinal"); for (int i = 1; i < 0xd800; i += STEP) { assertTrue(rs.next()); assertEquals(i, rs.getInt(1)); int count = (i + STEP) > 0xd800 ? 0xd800 - i : STEP; char[] testChars = new char[count]; for (int j = 0; j < count; ++j) { testChars[j] = (char) (i + j); } String testString = new String(testChars); assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(rs.getString(2))); } for (int i = 0xe000; i < 0x10000; i += STEP) { assertTrue(rs.next()); assertEquals(i, rs.getInt(1)); int count = (i + STEP) > 0x10000 ? 0x10000 - i : STEP; char[] testChars = new char[count]; for (int j = 0; j < count; ++j) { testChars[j] = (char) (i + j); } String testString = new String(testChars); assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(rs.getString(2))); } if (testHighUnicode) { for (int i = 0x10000; i < 0x110000; i += STEP) { assertTrue(rs.next()); assertEquals(i, rs.getInt(1)); int count = (i + STEP) > 0x110000 ? 0x110000 - i : STEP; char[] testChars = new char[count * 2]; for (int j = 0; j < count; ++j) { testChars[j * 2] = (char) (0xd800 + ((i + j - 0x10000) >> 10)); testChars[j * 2 + 1] = (char) (0xdc00 + ((i + j - 0x10000) & 0x3ff)); } String testString = new String(testChars); assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(rs.getString(2))); } } } @Test public void testUTF8Decode() throws Exception { // Tests for our custom UTF-8 decoder. Encoding utf8Encoding = Encoding.getJVMEncoding("UTF-8"); for (int ch = 0; ch < 0x110000; ++ch) { if (ch >= 0xd800 && ch < 0xe000) { continue; // Surrogate range. } String testString; if (ch >= 0x10000) { testString = new String(new char[]{(char) (0xd800 + ((ch - 0x10000) >> 10)), (char) (0xdc00 + ((ch - 0x10000) & 0x3ff))}); } else { testString = new String(new char[]{(char) ch}); } byte[] jvmEncoding = testString.getBytes("UTF-8"); String jvmDecoding = new String(jvmEncoding, 0, jvmEncoding.length, "UTF-8"); String ourDecoding = utf8Encoding.decode(jvmEncoding, 0, jvmEncoding.length); assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(jvmDecoding)); assertEquals("Test string: " + dumpString(testString), dumpString(testString), dumpString(ourDecoding)); } } @Test public void testBadUTF8Decode() throws Exception { Encoding utf8Encoding = Encoding.getJVMEncoding("UTF-8"); byte[][] badSequences = new byte[][]{ // One-byte illegal sequences {(byte) 0x80}, // First byte may not be 10xxxxxx // Two-byte illegal sequences {(byte) 0xc0, (byte) 0x00}, // Second byte must be 10xxxxxx {(byte) 0xc0, (byte) 0x80}, // Can't represent a value < 0x80 // Three-byte illegal sequences {(byte) 0xe0, (byte) 0x00}, // Second byte must be 10xxxxxx {(byte) 0xe0, (byte) 0x80, (byte) 0x00}, // Third byte must be 10xxxxxx {(byte) 0xe0, (byte) 0x80, (byte) 0x80}, // Can't represent a value < 0x800 {(byte) 0xed, (byte) 0xa0, (byte) 0x80}, // Not allowed to encode the range d800..dfff // Four-byte illegal sequences {(byte) 0xf0, (byte) 0x00}, // Second byte must be 10xxxxxx {(byte) 0xf0, (byte) 0x80, (byte) 0x00}, // Third byte must be 10xxxxxx {(byte) 0xf0, (byte) 0x80, (byte) 0x80, (byte) 0x00}, // Fourth byte must be 10xxxxxx {(byte) 0xf0, (byte) 0x80, (byte) 0x80, (byte) 0x80}, // Can't represent a value < 0x10000 // Five-byte illegal sequences {(byte) 0xf8}, // Can't have a five-byte sequence. // Six-byte illegal sequences {(byte) 0xfc}, // Can't have a six-byte sequence. // Seven-byte illegal sequences {(byte) 0xfe}, // Can't have a seven-byte sequence. // Eigth-byte illegal sequences {(byte) 0xff}, // Can't have an eight-byte sequence. }; byte[] paddedSequence = new byte[32]; for (int i = 0; i < badSequences.length; ++i) { byte[] sequence = badSequences[i]; try { String str = utf8Encoding.decode(sequence, 0, sequence.length); fail("Expected an IOException on sequence " + i + ", but decoded to <" + str + ">"); } catch (IOException ioe) { // Expected exception. } // Try it with padding. Arrays.fill(paddedSequence, (byte) 0); System.arraycopy(sequence, 0, paddedSequence, 0, sequence.length); try { String str = utf8Encoding.decode(paddedSequence, 0, paddedSequence.length); fail("Expected an IOException on sequence " + i + ", but decoded to <" + str + ">"); } catch (IOException ioe) { // Expected exception. } } } @Test public void testTruncatedUTF8Decode() throws Exception { Encoding utf8Encoding = Encoding.getJVMEncoding("UTF-8"); byte[][] shortSequences = new byte[][]{{(byte) 0xc0}, // Second byte must be present {(byte) 0xe0}, // Second byte must be present {(byte) 0xe0, (byte) 0x80}, // Third byte must be present {(byte) 0xf0}, // Second byte must be present {(byte) 0xf0, (byte) 0x80}, // Third byte must be present {(byte) 0xf0, (byte) 0x80, (byte) 0x80}, // Fourth byte must be present }; byte[] paddedSequence = new byte[32]; for (int i = 0; i < shortSequences.length; ++i) { byte[] sequence = shortSequences[i]; try { String str = utf8Encoding.decode(sequence, 0, sequence.length); fail("Expected an IOException on sequence " + i + ", but decoded to <" + str + ">"); } catch (IOException ioe) { // Expected exception. } // Try it with padding and a truncated length. Arrays.fill(paddedSequence, (byte) 0); System.arraycopy(sequence, 0, paddedSequence, 0, sequence.length); try { String str = utf8Encoding.decode(paddedSequence, 0, sequence.length); fail("Expected an IOException on sequence " + i + ", but decoded to <" + str + ">"); } catch (IOException ioe) { // Expected exception. } } } }