/*********************************************************************************************************************** * * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. * **********************************************************************************************************************/ package eu.stratosphere.pact.runtime.hash; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.ArrayList; import java.util.List; import java.util.Random; import org.junit.Test; import eu.stratosphere.api.common.typeutils.TypeComparator; import eu.stratosphere.api.common.typeutils.TypePairComparator; import eu.stratosphere.api.common.typeutils.TypeSerializer; import eu.stratosphere.core.memory.MemorySegment; import eu.stratosphere.pact.runtime.test.util.UniformStringPairGenerator; import eu.stratosphere.pact.runtime.test.util.types.IntList; import eu.stratosphere.pact.runtime.test.util.types.IntListComparator; import eu.stratosphere.pact.runtime.test.util.types.IntListPairComparator; import eu.stratosphere.pact.runtime.test.util.types.IntListSerializer; import eu.stratosphere.pact.runtime.test.util.types.IntPair; import eu.stratosphere.pact.runtime.test.util.types.IntPairComparator; import eu.stratosphere.pact.runtime.test.util.types.IntPairListPairComparator; import eu.stratosphere.pact.runtime.test.util.types.IntPairPairComparator; import eu.stratosphere.pact.runtime.test.util.types.IntPairSerializer; import eu.stratosphere.pact.runtime.test.util.types.StringPair; import eu.stratosphere.pact.runtime.test.util.types.StringPairComparator; import eu.stratosphere.pact.runtime.test.util.types.StringPairPairComparator; import eu.stratosphere.pact.runtime.test.util.types.StringPairSerializer; import eu.stratosphere.util.MutableObjectIterator; public class MemoryHashTableTest { private static final long RANDOM_SEED = 76518743207143L; private static final int KEY_VALUE_DIFF = 1021; private static final int PAGE_SIZE = 16 * 1024; private final Random rnd = new Random(RANDOM_SEED); private final TypeSerializer<IntPair> serializer = new IntPairSerializer(); private final TypeComparator<IntPair> comparator = new IntPairComparator(); private final TypePairComparator<IntPair, IntPair> pairComparator = new IntPairPairComparator(); private static final int MAX_LIST_SIZE = 8; private final TypeSerializer<IntList> serializerV = new IntListSerializer(); private final TypeComparator<IntList> comparatorV = new IntListComparator(); private final TypePairComparator<IntList, IntList> pairComparatorV = new IntListPairComparator(); private final TypePairComparator<IntPair, IntList> pairComparatorPL =new IntPairListPairComparator(); private final int SIZE = 80; //FIXME 75 triggers serialization bug in testVariableLengthBuildAndRetrieve private final int NUM_PAIRS = 100000; private final int NUM_LISTS = 100000; private final TypeSerializer<StringPair> serializerS = new StringPairSerializer(); private final TypeComparator<StringPair> comparatorS = new StringPairComparator(); private final TypePairComparator<StringPair, StringPair> pairComparatorS = new StringPairPairComparator(); public void testDifferentProbers() { final int NUM_MEM_PAGES = 32 * NUM_PAIRS / PAGE_SIZE; AbstractMutableHashTable<IntPair> table = new CompactingHashTable<IntPair>(serializer, comparator, getMemory(NUM_MEM_PAGES, PAGE_SIZE)); AbstractHashTableProber<IntPair, IntPair> prober1 = table.getProber(comparator, pairComparator); AbstractHashTableProber<IntPair, IntPair> prober2 = table.getProber(comparator, pairComparator); assertFalse(prober1 == prober2); } @Test public void testBuildAndRetrieve() { try { final int NUM_MEM_PAGES = 32 * NUM_PAIRS / PAGE_SIZE; final IntPair[] pairs = getRandomizedIntPairs(NUM_PAIRS, rnd); AbstractMutableHashTable<IntPair> table = new CompactingHashTable<IntPair>(serializer, comparator, getMemory(NUM_MEM_PAGES, PAGE_SIZE)); table.open(); for (int i = 0; i < NUM_PAIRS; i++) { table.insert(pairs[i]); } AbstractHashTableProber<IntPair, IntPair> prober = table.getProber(comparator, pairComparator); IntPair target = new IntPair(); for (int i = 0; i < NUM_PAIRS; i++) { assertTrue(prober.getMatchFor(pairs[i], target)); assertEquals(pairs[i].getValue(), target.getValue()); } table.close(); assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size()); } catch (Exception e) { e.printStackTrace(); fail("Error: " + e.getMessage()); } } @Test public void testEntryIterator() { try { final int NUM_MEM_PAGES = SIZE * NUM_LISTS / PAGE_SIZE; final IntList[] lists = getRandomizedIntLists(NUM_LISTS, rnd); AbstractMutableHashTable<IntList> table = new CompactingHashTable<IntList>(serializerV, comparatorV, getMemory(NUM_MEM_PAGES, PAGE_SIZE)); table.open(); int result = 0; for (int i = 0; i < NUM_LISTS; i++) { table.insert(lists[i]); result += lists[i].getKey(); } MutableObjectIterator<IntList> iter = table.getEntryIterator(); IntList target = new IntList(); int sum = 0; while((target = iter.next(target)) != null) { sum += target.getKey(); } table.close(); assertTrue(sum == result); assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size()); } catch (Exception e) { e.printStackTrace(); fail("Error: " + e.getMessage()); } } @Test public void testMultipleProbers() { try { final int NUM_MEM_PAGES = SIZE * NUM_LISTS / PAGE_SIZE; final IntList[] lists = getRandomizedIntLists(NUM_LISTS, rnd); final IntPair[] pairs = getRandomizedIntPairs(NUM_LISTS, rnd); AbstractMutableHashTable<IntList> table = new CompactingHashTable<IntList>(serializerV, comparatorV, getMemory(NUM_MEM_PAGES, PAGE_SIZE)); table.open(); for (int i = 0; i < NUM_LISTS; i++) { table.insert(lists[i]); } AbstractHashTableProber<IntList, IntList> listProber = table.getProber(comparatorV, pairComparatorV); AbstractHashTableProber<IntPair, IntList> pairProber = table.getProber(comparator, pairComparatorPL); IntList target = new IntList(); for (int i = 0; i < NUM_LISTS; i++) { assertTrue(pairProber.getMatchFor(pairs[i], target)); assertTrue(listProber.getMatchFor(lists[i], target)); assertArrayEquals(lists[i].getValue(), target.getValue()); } } catch (Exception e) { e.printStackTrace(); fail("Error: " + e.getMessage()); } } @Test public void testVariableLengthBuildAndRetrieve() { try { final int NUM_MEM_PAGES = SIZE * NUM_LISTS / PAGE_SIZE; final IntList[] lists = getRandomizedIntLists(NUM_LISTS, rnd); AbstractMutableHashTable<IntList> table = new CompactingHashTable<IntList>(serializerV, comparatorV, getMemory(NUM_MEM_PAGES, PAGE_SIZE)); table.open(); for (int i = 0; i < NUM_LISTS; i++) { try { table.insert(lists[i]); } catch (Exception e) { //System.out.println("index: " + i + " "); throw e; } } AbstractHashTableProber<IntList, IntList> prober = table.getProber(comparatorV, pairComparatorV); IntList target = new IntList(); for (int i = 0; i < NUM_LISTS; i++) { assertTrue(prober.getMatchFor(lists[i], target)); assertArrayEquals(lists[i].getValue(), target.getValue()); } final IntList[] overwriteLists = getRandomizedIntLists(NUM_LISTS, rnd); // test replacing IntList tempHolder = new IntList(); for (int i = 0; i < NUM_LISTS; i++) { table.insertOrReplaceRecord(overwriteLists[i], tempHolder); } for (int i = 0; i < NUM_LISTS; i++) { assertTrue(prober.getMatchFor(overwriteLists[i], target)); assertArrayEquals(overwriteLists[i].getValue(), target.getValue()); } table.close(); assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size()); } catch (Exception e) { e.printStackTrace(); fail("Error: " + e.getMessage()); } } @Test public void testVariableLengthBuildAndRetrieveMajorityUpdated() { try { final int NUM_MEM_PAGES = SIZE * NUM_LISTS / PAGE_SIZE; final IntList[] lists = getRandomizedIntLists(NUM_LISTS, rnd); AbstractMutableHashTable<IntList> table = new CompactingHashTable<IntList>(serializerV, comparatorV, getMemory(NUM_MEM_PAGES, PAGE_SIZE)); table.open(); for (int i = 0; i < NUM_LISTS; i++) { table.insert(lists[i]); } AbstractHashTableProber<IntList, IntList> prober = table.getProber(comparatorV, pairComparatorV); IntList target = new IntList(); for (int i = 0; i < NUM_LISTS; i++) { assertTrue(prober.getMatchFor(lists[i], target)); assertArrayEquals(lists[i].getValue(), target.getValue()); } final IntList[] overwriteLists = getRandomizedIntLists(NUM_LISTS, rnd); // test replacing IntList tempHolder = new IntList(); for (int i = 0; i < NUM_LISTS; i++) { if( i % 100 != 0) { table.insertOrReplaceRecord(overwriteLists[i], tempHolder); lists[i] = overwriteLists[i]; } } for (int i = 0; i < NUM_LISTS; i++) { assertTrue(prober.getMatchFor(lists[i], target)); assertArrayEquals(lists[i].getValue(), target.getValue()); } table.close(); assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size()); } catch (Exception e) { e.printStackTrace(); fail("Error: " + e.getMessage()); } } @Test public void testVariableLengthBuildAndRetrieveMinorityUpdated() { try { final int NUM_LISTS = 20000; final int NUM_MEM_PAGES = SIZE * NUM_LISTS / PAGE_SIZE; final int STEP_SIZE = 100; final IntList[] lists = getRandomizedIntLists(NUM_LISTS, rnd); AbstractMutableHashTable<IntList> table = new CompactingHashTable<IntList>(serializerV, comparatorV, getMemory(NUM_MEM_PAGES, PAGE_SIZE)); table.open(); for (int i = 0; i < NUM_LISTS; i++) { table.insert(lists[i]); } AbstractHashTableProber<IntList, IntList> prober = table.getProber(comparatorV, pairComparatorV); IntList target = new IntList(); for (int i = 0; i < NUM_LISTS; i++) { assertTrue(prober.getMatchFor(lists[i], target)); assertArrayEquals(lists[i].getValue(), target.getValue()); } final IntList[] overwriteLists = getRandomizedIntLists(NUM_LISTS/STEP_SIZE, rnd); // test replacing IntList tempHolder = new IntList(); for (int i = 0; i < NUM_LISTS; i += STEP_SIZE) { overwriteLists[i/STEP_SIZE].setKey(overwriteLists[i/STEP_SIZE].getKey()*STEP_SIZE); table.insertOrReplaceRecord(overwriteLists[i/STEP_SIZE], tempHolder); lists[i] = overwriteLists[i/STEP_SIZE]; } for (int i = 0; i < NUM_LISTS; i++) { assertTrue(prober.getMatchFor(lists[i], target)); assertArrayEquals(lists[i].getValue(), target.getValue()); } table.close(); assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size()); } catch (Exception e) { e.printStackTrace(); fail("Error: " + e.getMessage()); } } @Test public void testVariableLengthStringBuildAndRetrieve() { try { final int NUM_MEM_PAGES = 40 * NUM_PAIRS / PAGE_SIZE; MutableObjectIterator<StringPair> buildInput = new UniformStringPairGenerator(NUM_PAIRS, 1, false); MutableObjectIterator<StringPair> probeTester = new UniformStringPairGenerator(NUM_PAIRS, 1, false); MutableObjectIterator<StringPair> updater = new UniformStringPairGenerator(NUM_PAIRS, 1, false); MutableObjectIterator<StringPair> updateTester = new UniformStringPairGenerator(NUM_PAIRS, 1, false); //long start = 0L; //long end = 0L; //long first = System.currentTimeMillis(); //System.out.println("Creating and filling CompactingHashMap..."); //start = System.currentTimeMillis(); AbstractMutableHashTable<StringPair> table = new CompactingHashTable<StringPair>(serializerS, comparatorS, getMemory(NUM_MEM_PAGES, PAGE_SIZE)); table.open(); StringPair target = new StringPair(); while(buildInput.next(target) != null) { table.insert(target); } //end = System.currentTimeMillis(); //System.out.println("HashMap ready. Time: " + (end-start) + " ms"); //System.out.println("Starting first probing run..."); //start = System.currentTimeMillis(); AbstractHashTableProber<StringPair, StringPair> prober = table.getProber(comparatorS, pairComparatorS); StringPair temp = new StringPair(); while(probeTester.next(target) != null) { assertTrue(prober.getMatchFor(target, temp)); assertEquals(temp.getValue(), target.getValue()); } //end = System.currentTimeMillis(); //System.out.println("Probing done. Time: " + (end-start) + " ms"); //System.out.println("Starting update..."); //start = System.currentTimeMillis(); while(updater.next(target) != null) { target.setValue(target.getValue()); table.insertOrReplaceRecord(target, temp); } //end = System.currentTimeMillis(); //System.out.println("Update done. Time: " + (end-start) + " ms"); //System.out.println("Starting second probing run..."); //start = System.currentTimeMillis(); while (updateTester.next(target) != null) { assertTrue(prober.getMatchFor(target, temp)); assertEquals(target.getValue(), temp.getValue()); } //end = System.currentTimeMillis(); //System.out.println("Probing done. Time: " + (end-start) + " ms"); table.close(); //end = System.currentTimeMillis(); //System.out.println("Overall time: " + (end-first) + " ms"); assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size()); } catch (Exception e) { e.printStackTrace(); fail("Error: " + e.getMessage()); } } private static IntPair[] getRandomizedIntPairs(int num, Random rnd) { IntPair[] pairs = new IntPair[num]; // create all the pairs, dense for (int i = 0; i < num; i++) { pairs[i] = new IntPair(i, i + KEY_VALUE_DIFF); } // randomly swap them for (int i = 0; i < 2 * num; i++) { int pos1 = rnd.nextInt(num); int pos2 = rnd.nextInt(num); IntPair tmp = pairs[pos1]; pairs[pos1] = pairs[pos2]; pairs[pos2] = tmp; } return pairs; } private static IntList[] getRandomizedIntLists(int num, Random rnd) { IntList[] lists = new IntList[num]; for (int i = 0; i < num; i++) { int[] value = new int[rnd.nextInt(MAX_LIST_SIZE)+1]; //int[] value = new int[MAX_LIST_SIZE-1]; for (int j = 0; j < value.length; j++) { value[j] = -rnd.nextInt(Integer.MAX_VALUE); } lists[i] = new IntList(i, value); } return lists; } private static List<MemorySegment> getMemory(int numPages, int pageSize) { List<MemorySegment> memory = new ArrayList<MemorySegment>(); for (int i = 0; i < numPages; i++) { memory.add(new MemorySegment(new byte[pageSize])); } return memory; } }