/* * * * Copyright (c) 2011-2015 EPFL DATA Laboratory * * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * * * All rights reserved. * * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * */ package ch.epfl.data.squall.thetajoin.matrix_assignment; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.apache.log4j.Logger; /** * Apply the idea of equal-size matrix partitioning of the paper "Scalable and Adaptive Online Joins" for hypercube.<br/> * http://wiki.epfl.ch/bigdata2015-hypercubejoins/hcp-equal-size <br/> * Basically, it divides the join hypercube into equal-size regions by solving the two equations:<br/> * rd[0] * ... * rd[k-1] = r <br/> * sizes[0] / rd[0] = sizes[1] / rd[1] = ... = sizes[k] / rd[k] <br/> * * @author Tam * @param <KeyType> */ public class CubeNAssignmentEqui<KeyType> implements Serializable, HyperCubeAssignment<KeyType> { private static final long serialVersionUID = 1L; private static Logger LOG = Logger.getLogger(CubeNAssignmentEqui.class); private Random rand; private int[] _rd; private final int _r; private long[] sizes; private Map<String, Integer> regionIDsMap; private Comparator<Assignment> _comparator = new CombineCost(); // Only for testing small join cube, as the cache is equal to the size of join cube (=sizes[0] * ... * sizes[k-1]) private boolean useRegionMapCache = false; public CubeNAssignmentEqui(long[] sizes, int r, long randomSeed) { rand = randomSeed == -1 ? new Random() : new Random(randomSeed); this.sizes = sizes; this._rd = new int[sizes.length]; this._r = r; compute(); if (useRegionMapCache) createRegionMap(); } public CubeNAssignmentEqui(long[] sizes, int r, long randomSeed, Comparator<Assignment> comparator) { rand = randomSeed == -1 ? new Random() : new Random(randomSeed); this.sizes = sizes; this._rd = new int[sizes.length]; this._r = r; this._comparator = comparator; compute(); if (useRegionMapCache) createRegionMap(); } private void compute() { long compare = _r; for (long size : sizes) { compare = compare / size; } // If #joiners larger than the size of join matrix itself, each cell is a partition if (compare >= 1) { for (int i = 0; i < _rd.length; i++) { _rd[i] = (int) sizes[i]; } return; } // Solve the two equivalent partitioning equations double[] RdExactValues = new double[_rd.length]; for (int i = 0; i < _rd.length; i++) { double rootNOfRatios = Math.pow(_r, 1.0 / _rd.length); for (int j = 0; j < _rd.length; j++){ double ratio = (double) sizes[i] / sizes[j]; rootNOfRatios *= Math.pow(ratio, 1.0 / _rd.length); } RdExactValues[i] = rootNOfRatios; } // Round up the real values less than 1 (e.g. 0.78 -> 1) by reducing the other maximal values while (Utilities.existsLess(RdExactValues, 1.0)) { for (int i = 0; i < RdExactValues.length; i++){ if (RdExactValues[i] < 1) { int j = Utilities.indexOfMax(RdExactValues); RdExactValues[j] *= RdExactValues[i]; RdExactValues[i] = 1; } } } // Adjust the real values to integer values int[][] possibleValuesOfRd = new int[_rd.length][2]; for (int i = 0; i < _rd.length; i++) { possibleValuesOfRd[i][0] = (int) Math.floor(RdExactValues[i]); possibleValuesOfRd[i][1] = (int) Math.ceil(RdExactValues[i]); } // We find the best partition as hypercubes int[] rd = new int[_rd.length]; Arrays.fill(rd, 0); ArrangementIterator binaryGenerator = new ArrangementIterator(Arrays.asList(0,1), rd.length); int count = 0; while (binaryGenerator.hasNext()) { List<Integer> combination = binaryGenerator.next(); for (int dim = 0; dim < rd.length; dim++) { rd[dim] = possibleValuesOfRd[dim][combination.get(dim)]; } int r = Utilities.multiply(rd); if (r < _r * 0.5 || r > _r) continue; // Only tolerate from 100% to 50% of the machines (e.g 1999 -> 1000) if (count == 0) { Utilities.copy(rd, _rd); } else { // If new assignment is better than the best assignment so far if (_comparator.compare(new Assignment(sizes, rd), new Assignment(sizes, _rd)) > 0){ Utilities.copy(rd, _rd); } } count++; } assert count > 0: "Not found any assignment satisfying the equations"; } private void createRegionMap() { regionIDsMap = new HashMap<String, Integer>(); CellIterator gen = new CellIterator(_rd); while (gen.hasNext()) { List<Integer> cellIndex = gen.next(); mapRegionID(cellIndex); } } private int mapRegionID(List<Integer> regionIndex) { assert _rd.length == regionIndex.size(); // Look up at cache first if (useRegionMapCache) { assert regionIDsMap.containsKey(getMappingIndexes(regionIndex)); return regionIDsMap.get(getMappingIndexes(regionIndex)); } // Compute if not found in cache int regionID = 0; for (int i = regionIndex.size() - 1; i >= 0; i--) { int dimAmount = regionIndex.get(i); for (int dim = _rd.length - 1; dim > i; dim--) { dimAmount *= _rd[dim]; } regionID += dimAmount; } return regionID; } @Override public List<Integer> getRegionIDs(Dimension dim) { final List<Integer> regionIDs = new ArrayList<Integer>(); if (dim.val() >= 0 && dim.val() < sizes.length) { final int randomIndex = rand.nextInt(_rd[dim.val()]); CellIterator gen = new CellIterator(_rd, dim.val(), randomIndex); while (gen.hasNext()) { List<Integer> cellIndex = gen.next(); int regionID = mapRegionID(cellIndex); regionIDs.add(regionID); } assert regionIDs.size() == Utilities.multiply(_rd) / _rd[dim.val()]; } else { LOG.info("ERROR not a possible index assignment."); } return regionIDs; } @Override public List<Integer> getRegionIDs(Dimension dim, KeyType key) { throw new RuntimeException("This method is content-insenstive"); } @Override public String toString() { return getMappingDimensions(); } @Override public String getMappingDimensions() { StringBuilder sb = new StringBuilder(); String prefix = ""; for (int r : _rd) { sb.append(prefix); prefix = "-"; sb.append(r); } return sb.toString(); } public static String getMappingIndexes(List<Integer> regionIndex) { StringBuilder sb = new StringBuilder(); String prefix = ""; for (Integer r : regionIndex) { sb.append(prefix); prefix = "-"; sb.append(r); } return sb.toString(); } @Override public int getNumberOfRegions(Dimension dim) { if (dim.val() >= 0 && dim.val() < _rd.length) { return _rd[dim.val()]; } else { throw new RuntimeException("Dimension is invalid"); } } /** * The actual number of regions with the best prime factorization. */ public int getNumberOfRegions(){ return Utilities.multiply(_rd); } public static void main(String[] args) { // testcase1(); // testcase2(); // testcase3(); testcase4(); } public static void testcase1(){ List<CubeNAssignmentEqui> tests = Arrays.asList( new CubeNAssignmentEqui(Utilities.arrayOf(13, 7), 1, -1), new CubeNAssignmentEqui(Utilities.arrayOf(4, 4, 4), 8, -1), new CubeNAssignmentEqui(Utilities.arrayOf(4, 4, 4, 4), 16, -1), new CubeNAssignmentEqui(Utilities.arrayOf(8, 4, 10, 7), 1000, -1), new CubeNAssignmentEqui(Utilities.arrayOf(10, 10, 10, 10), 1021, -1) ); for (CubeNAssignmentEqui test : tests) { LOG.info("Input: " + Arrays.toString(test.sizes) + ", " + test._r); LOG.info("#Reducers each dimension: " + test.toString()); for (int i = 0; i < 3; i++) { LOG.info("Get Regions of dimension 1: " + test.getRegionIDs(Dimension.d(0)).toString()); } } } public static void testcase2(){ List<CubeNAssignmentEqui> tests = Arrays.asList( new CubeNAssignmentEqui(Utilities.arrayOf(10000, 1000,100,100,100,100,100,100), 1000, -1) ); for (CubeNAssignmentEqui test : tests) { LOG.info("Input: " + Arrays.toString(test.sizes) + ", " + test._r); LOG.info("#Reducers each dimension: " + test.toString()); for (int i = 0; i < 3; i++) { LOG.info("Get Regions of dimension 1: " + test.getRegionIDs(Dimension.d(0)).toString()); } } } public static void testcase3(){ List<CubeNAssignmentEqui> tests = Arrays.asList( new CubeNAssignmentEqui(Utilities.arrayOf(10000, 10000, 100,100,100,100,100,100), 1000, -1) ); for (CubeNAssignmentEqui test : tests) { LOG.info("Input: " + Arrays.toString(test.sizes) + ", " + test._r); LOG.info("#Reducers each dimension: " + test.toString()); for (int i = 0; i < 3; i++) { LOG.info("Get Regions of dimension 1: " + test.getRegionIDs(Dimension.d(0)).toString()); } } } public static void testcase4(){ List<CubeNAssignmentEqui> tests = Arrays.asList( new CubeNAssignmentEqui(Utilities.arrayOf(10000, 10000, 10000, 10000, 10000, 10000, 100, 100), 1000, -1) ); for (CubeNAssignmentEqui test : tests) { LOG.info("Input: " + Arrays.toString(test.sizes) + ", " + test._r); LOG.info("#Reducers each dimension: " + test.toString()); for (int i = 0; i < 3; i++) { LOG.info("Get Regions of dimension 1: " + test.getRegionIDs(Dimension.d(0)).toString()); } } } }