package com.fasterxml.jackson.core.sym; import java.io.IOException; public class TestSymbolTables extends com.fasterxml.jackson.test.BaseTest { // 11 3-char snippets that hash to 0xFFFF (with default JDK hashCode() calc), // and which can be combined as // sequences, like, say, 11x11x11 (1331) 9-character thingies final static String[] CHAR_COLLISION_SNIPPETS_31 = { "@~}", "@\u007f^", "A_}", "A`^", "Aa?", "B@}", "BA^", "BB?", "C!}", "C\"^", "C#?" }; final static String[] CHAR_COLLISIONS; static { final String[] SNIPPETS = CHAR_COLLISION_SNIPPETS_31; final int len = SNIPPETS.length; CHAR_COLLISIONS = new String[len*len*len]; int ix = 0; for (int i1 = 0; i1 < len; ++i1) { for (int i2 = 0; i2 < len; ++i2) { for (int i3 = 0; i3 < len; ++i3) { CHAR_COLLISIONS[ix++] = SNIPPETS[i1]+SNIPPETS[i2] + SNIPPETS[i3]; } } } } /* public void testCharBasedCollisions() { CharsToNameCanonicalizer sym = CharsToNameCanonicalizer.createRoot(0); // first, verify that we'd get a few collisions... try { int firstHash = 0; for (String str : CHAR_COLLISIONS) { int hash = sym.calcHash(str); if (firstHash == 0) { firstHash = hash; } else { assertEquals(firstHash, hash); } sym.findSymbol(str.toCharArray(), 0, str.length(), hash); } fail("Should have thrown exception"); } catch (IllegalStateException e) { verifyException(e, "exceeds maximum"); // should fail right after addition: assertEquals(CharsToNameCanonicalizer.MAX_COLL_CHAIN_LENGTH+1, sym.maxCollisionLength()); assertEquals(CharsToNameCanonicalizer.MAX_COLL_CHAIN_LENGTH+1, sym.collisionCount()); // one "non-colliding" entry (head of collision chain), thus: assertEquals(CharsToNameCanonicalizer.MAX_COLL_CHAIN_LENGTH+2, sym.size()); } } */ // Test for verifying stability of hashCode, wrt collisions, using // synthetic field name generation and character-based input public void testSyntheticWithChars() { // pass seed, to keep results consistent: CharsToNameCanonicalizer symbols = CharsToNameCanonicalizer.createRoot(1); final int COUNT = 6000; for (int i = 0; i < COUNT; ++i) { String id = fieldNameFor(i); char[] ch = id.toCharArray(); symbols.findSymbol(ch, 0, ch.length, symbols.calcHash(id)); } assertEquals(8192, symbols.bucketCount()); assertEquals(COUNT, symbols.size()); //System.out.printf("Char stuff: collisions %d, max-coll %d\n", symbols.collisionCount(), symbols.maxCollisionLength()); // holy guacamoley... there are way too many. 31 gives 3567 (!), 33 gives 2747 // ... at least before shuffling. Shuffling helps quite a lot, so: assertEquals(1401, symbols.collisionCount()); // esp. with collisions; first got about 30 assertEquals(4, symbols.maxCollisionLength()); } // Test for verifying stability of hashCode, wrt collisions, using // synthetic field name generation and byte-based input (UTF-8) public void testSyntheticWithBytes() throws IOException { // pass seed, to keep results consistent: BytesToNameCanonicalizer symbols = BytesToNameCanonicalizer.createRoot(33333).makeChild(true, true); final int COUNT = 6000; for (int i = 0; i < COUNT; ++i) { String id = fieldNameFor(i); int[] quads = BytesToNameCanonicalizer.calcQuads(id.getBytes("UTF-8")); symbols.addName(id, quads, quads.length); } assertEquals(COUNT, symbols.size()); assertEquals(8192, symbols.bucketCount()); //System.out.printf("Byte stuff: collisions %d, max-coll %d\n", symbols.collisionCount(), symbols.maxCollisionLength()); // Fewer collisions than with chars, but still quite a few assertEquals(1686, symbols.collisionCount()); // but not super long collision chains: assertEquals(9, symbols.maxCollisionLength()); } }