package test; import java.io.*; import com.fasterxml.aalto.in.*; public final class TestNameHashing { ByteBasedPNameTable mTable = null; int mCharCount = 0; protected TestNameHashing() { } protected void test(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: java "+getClass().getName()+" <file>"); System.exit(1); } test2(args); int wordCount = mTable.size(); double avgLen = (double) mCharCount / (double) wordCount; // Let's check memory usage too: Runtime rt = Runtime.getRuntime(); long freeMin = rt.freeMemory(); System.out.println("DEBUG: Free1: "+freeMin+", total: "+rt.totalMemory()+", max: "+rt.maxMemory()); try { Thread.sleep(400L); } catch (InterruptedException ie) { } Thread.yield(); System.gc(); Thread.yield(); try { Thread.sleep(400L); } catch (InterruptedException ie) { } Thread.yield(); System.gc(); Thread.yield(); freeMin = rt.freeMemory(); System.out.println("DEBUG: Free2: "+freeMin+", total: "+rt.totalMemory()+", max: "+rt.maxMemory()); mTable.nuke(); mTable = null; try { Thread.sleep(400L); } catch (InterruptedException ie) { } Thread.yield(); System.gc(); Thread.yield(); try { Thread.sleep(400L); } catch (InterruptedException ie) { } Thread.yield(); System.gc(); Thread.yield(); long freeMax = rt.freeMemory(); System.out.println("DEBUG: Free3: "+freeMax+", total: "+rt.totalMemory()+", max: "+rt.maxMemory()); long tableSize = freeMax - freeMin; double avgSize = tableSize / (double) wordCount; System.out.println("Memory used by table: "+tableSize+" -> "+avgSize+" bytes per word ("+avgLen+" chars/word)"); } protected void test2(String[] args) throws Exception { mTable = new ByteBasedPNameTable(64); InputStream in = new FileInputStream(args[0]); BufferedReader br = new BufferedReader(new InputStreamReader(in)); System.out.println("Ok, starting to read in names: "); String word; mCharCount = 0; while ((word = br.readLine()) != null) { if (tryToFind(mTable, word) == null) { addSymbol(mTable, word); //System.out.print("+'"+word+"' "); //System.out.print('+'); mCharCount += word.length(); } else { System.out.print('.'); } } System.out.println("."); System.out.println("Done! Table: "+mTable.toString()); //System.out.println(" -> "+mTable.toDebugString()); in.close(); } PName tryToFind(ByteBasedPNameTable table, String word) { int[] quads = calcQuads(word); int hash = ByteBasedPNameTable.calcHash(quads, quads.length); if (quads.length < 3) { return table.findSymbol(hash, quads[0], (quads.length < 2) ? 0 : quads[1]); } return table.findSymbol(hash, quads, quads.length); } PName addSymbol(ByteBasedPNameTable table, String word) { int[] quads = calcQuads(word); int colonIx = word.indexOf(':'); int hash = ByteBasedPNameTable.calcHash(quads, quads.length); if (quads.length < 3) { return table.addSymbol(hash, word, colonIx, quads[0], (quads.length < 2) ? 0 : quads[1]); } return table.addSymbol(hash, word, colonIx, quads, quads.length); } int[] calcQuads(String word) { byte[] wordBytes; try { wordBytes = word.getBytes("UTF-8"); } catch (java.io.UnsupportedEncodingException ex) { throw new Error("Internal error: "+ex); // should never occur } int blen = wordBytes.length; int[] result = new int[(blen + 3) / 4]; for (int i = 0; i < blen; ++i) { int x = wordBytes[i] & 0xFF; if (++i < blen) { x = (x << 8) | (wordBytes[i] & 0xFF); if (++i < blen) { x = (x << 8) | (wordBytes[i] & 0xFF); if (++i < blen) { x = (x << 8) | (wordBytes[i] & 0xFF); } } } result[i / 4] = x; } return result; } public static void main(String[] args) throws Exception { new TestNameHashing().test(args); } }