/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hashtable; import java.util.HashMap; import java.util.Map; import java.io.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; public class HashTableBenchmark { private static final Log LOG = LogFactory.getLog(HashTableBenchmark.class); private int capacity = 64 * 1024 * 1024; private int maxsize = 90000000; private Long[] ids; private LongInfo[] idsLI; private int divider = 100000000; private int NUM_NODES = 0; private int hash_mask = capacity - 1; private String blockFile; private boolean linkedElements = false; private RandomGen rg; public HashTableBenchmark(String filename, int which, int capacity, int count, boolean linkedElements) { this.capacity = capacity; this.hash_mask = capacity - 1; this.maxsize = count; this.blockFile = filename; this.linkedElements = linkedElements; switch (which) { case 0: readBlockFile(); break; default: rg = new RandomGen(which); generateRandom(); } } private String getHistogram(int[] entries) { Map<Integer, Integer> hist = new HashMap<Integer, Integer>(); for (int i = 0; i < entries.length; i++) { Integer count = hist.get(entries[i]); if (count == null) { hist.put(entries[i], 1); } else { hist.put(entries[i], count + 1); } } return "HISTOGRAM: entriesLen: " + entries.length + " -- " + hist.toString(); } // //////////////// READ + RANDOM GENERATORS private void readBlockFile() { try { LOG.info("----> READ BLOCK FILE : START"); initArray(); FileInputStream fstream = new FileInputStream(blockFile); DataInputStream in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader( new DataInputStream(fstream))); String strLine; NUM_NODES = 0; while ((strLine = br.readLine()) != null) { if (NUM_NODES % divider == 0) LOG.info("Processed : " + NUM_NODES); updateArray(NUM_NODES, Long.parseLong(strLine)); NUM_NODES++; } in.close(); LOG.info("----> READ BLOCK FILE : DONE: Read " + NUM_NODES + " block ids"); } catch (Exception e) { e.printStackTrace(); } } private void generateRandom() { initArray(); long start, stop; LOG.info("---------->GENERATING RANDOM IDS ---------->"); start = System.currentTimeMillis(); for (int i = 0; i < maxsize; i++) { updateArray(i, rg.next()); } NUM_NODES = maxsize; stop = System.currentTimeMillis(); LOG.info("---------->GENERATING RANDOM IDS DONE -- TIME: " + ((stop - start) / 1000.0) + " GENRATED: " + NUM_NODES + " ids "); } // ////////////////////////////////////////////////////////// private void initArray() { if (linkedElements) { idsLI = new LongInfo[maxsize]; } else { ids = new Long[maxsize]; } } private void updateArray(int i, long id) { if (linkedElements) { idsLI[i] = new LongInfo(id); } else { ids[i] = new Long(id); } } // ////////////////////////////////////////////////////////// public void testMultiHashing(int mode) { LOG.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++"); LOG.info("-------------------->MULTIHASHING------------------->"); long start, stop; THashSet c = null; if (mode == 0) { c = new QuadHash(capacity, 0); LOG.info("LINEAR COLLISION RESOLUTION"); } else if (mode == 1) { c = new QuadHash(capacity, 1); LOG.info("QUAD COLLISION RESOLUTION"); } else if (mode == 2) { c = new DoubleHash(capacity); LOG.info("DOUBLE HASH COLLISION RESOLUTION"); } else if (mode == 3) { c = new CuckooHash(capacity); LOG.info("CUCKOO HASH COLLISION RESOLUTION"); } start = System.currentTimeMillis(); for (int i = 0; i < NUM_NODES; i++) { c.put(ids[i]); } stop = System.currentTimeMillis(); LOG.info("--------------->MULTIHASHING PUT DONE--------------->"); LOG.info(" TIME: " + ((stop - start) / 1000.0)); LOG.info(" FAILED : " + c.getFailed()); start = System.currentTimeMillis(); int present = 0; for (int i = 0; i < NUM_NODES; i++) { Long getElem = c.get(ids[i]); if (getElem != null && getElem.equals(ids[i])) { present++; } } stop = System.currentTimeMillis(); LOG.info("--------------->MULTIHASHING GET DONE--------------->"); LOG.info(" TIME: " + ((stop - start) / 1000.0)); LOG.info(" NOT PRESENT: " + (NUM_NODES - present)); } public void testLightweightSetHashing(int mode) { LOG.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++"); LOG.info("------------------>LIGHTWEIGHTGSET------------------>"); long start, stop; LightWeightSet c = null; if (mode == 0) { c = new LightWeightGSet(capacity); LOG.info("SET VERSION: ONE HASH"); } else { c = new LightWeightGSetMulti(capacity); LOG.info("SET VERSION: DOUBLE HASH"); } start = System.currentTimeMillis(); for (int i = 0; i < NUM_NODES; i++) { c.put(idsLI[i]); } stop = System.currentTimeMillis(); LOG.info("------------->LIGHTWEIGHTGSET PUT DONE-------------->"); LOG.info(" TIME: " + ((stop - start) / 1000.0)); start = System.currentTimeMillis(); int present = 0; LongInfo tempi = new LongInfo(); for (int i = 0; i < NUM_NODES; i++) { tempi.setData(idsLI[i].data); LongInfo getElem = c.get(tempi); if (getElem != null && getElem.equals(tempi)) present++; } stop = System.currentTimeMillis(); LOG.info("------------->LIGHTWEIGHTGSET GET DONE-------------->"); LOG.info(" TIME: " + ((stop - start) / 1000.0)); LOG.info(" NOT PRESENT: " + (NUM_NODES - present)); } public void testHashFunctions() { long start, stop; int[] map; LOG.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++"); for (int hash = 0; hash < 7; hash++) { LOG.info("------------------>" + Hashes.getHashDesc(hash) + "------------------>"); map = new int[capacity]; start = System.currentTimeMillis(); for (int i = 0; i < NUM_NODES; i++) { map[Hashes.getHash(ids[i], hash) & hash_mask]++; } stop = System.currentTimeMillis(); LOG.info("TIME: " + ((stop - start) / 1000.0)); LOG.info("HIST :" + getHistogram(map)); } LOG.info("================> Double Hashing ================>"); map = new int[capacity]; start = System.currentTimeMillis(); for (int i = 0; i < NUM_NODES; i++) { int hash1 = Hashes.getHash32ShiftMul((int) (ids[i].longValue())) & hash_mask; if (map[hash1] == 0) map[hash1]++; else map[Hashes.getHash6432shift(ids[i]) & hash_mask]++; } stop = System.currentTimeMillis(); LOG.info("TIME: " + ((stop - start) / 1000.0)); LOG.info("HIST :" + getHistogram(map)); } }