/* * * * Copyright (c) 2011-2015 EPFL DATA Laboratory * * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * * * All rights reserved. * * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * */ package ch.epfl.data.squall.thetajoin.matrix_assignment; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.apache.log4j.Logger; /** * Basically, it divides the join hypercube into equal-size regions by brute-force. <br/> * That is, consider all divisions on each dimension such that rd[0] * ... * rd[k-1] = r <br/> * http://wiki.epfl.ch/bigdata2015-hypercubejoins/hcpartition * * @author Tam * @param <KeyType> */ public class CubeNAssignmentBruteForce<KeyType> implements Serializable, HyperCubeAssignment<KeyType> { public static long timeout = 1L; private static final long serialVersionUID = 1L; private static Logger LOG = Logger.getLogger(CubeNAssignmentBruteForce.class); private Random rand; private int[] _rd; private final int _r; private long[] sizes; private Map<String, Integer> regionIDsMap; private Comparator<Assignment> _comparator = new CombineCost(); // Only for testing small join cube, as the cache is equal to the size of join cube (=sizes[0] * ... * sizes[k-1]) private boolean useRegionMapCache = false; public CubeNAssignmentBruteForce(long[] sizes, int r, long randomSeed) { rand = randomSeed == -1 ? new Random() : new Random(randomSeed); this.sizes = sizes; this._rd = new int[sizes.length]; this._r = r; compute(); if (useRegionMapCache) createRegionMap(); } public CubeNAssignmentBruteForce(long[] sizes, int r, long randomSeed, Comparator<Assignment> comparator) { rand = randomSeed == -1 ? new Random() : new Random(randomSeed); this.sizes = sizes; this._rd = new int[sizes.length]; this._r = r; this._comparator = comparator; compute(); if (useRegionMapCache) createRegionMap(); } private void compute() { long compare = _r; for (long size : sizes) { compare = compare / size; } // If #joiners larger than the size of join matrix itself, each cell is // a partition if (compare >= 1) { for (int i = 0; i < _rd.length; i++) { _rd[i] = (int) sizes[i]; } return; } // We find the best partition as hypercubes int[] rd = new int[_rd.length]; // Find the prime factors of the _r. //final int r = findBestR(_r, 0.5); // Maximum 50% of machines not used final int r = _r; final List<Integer> primeFactors = Utilities.primeFactors(r); // Get the Power Set, and iterate over it... List<List<Integer>> powerSet = new ArrayList<List<Integer>>(Utilities.powerSet(primeFactors)); SetArrangementIterator generator = new SetArrangementIterator(powerSet, rd.length); int count = 0; while (generator.hasNext()) { List<List<Integer>> combination = generator.next(); for (int dim = 0; dim < rd.length; dim++) { rd[dim] = Utilities.multiply(combination.get(dim)); } if (Utilities.multiply(rd) != r) continue; if (count == 0) { Utilities.copy(rd, _rd); } else { // double currentComp = computationCost(sizes, rd); // double currentComm = communicationCost(sizes, rd); // double bestComp = computationCost(sizes, _rd); // double bestComm = communicationCost(sizes, _rd); // if (currentComp <= bestComp && currentComm <= bestComm) { // Utilities.copy(rd, _rd); // } // If new assignment is better than the best assignment so far if (_comparator.compare(new Assignment(sizes, rd), new Assignment(sizes, _rd)) > 0) { Utilities.copy(rd, _rd); } } count++; } } private int findBestR(int r, double tolerate) { assert tolerate <= 1 && tolerate >= 0.5; int bestR = r; List<Integer> bestPrimes = Utilities.primeFactors(r); for (int i = r - 1; i > r * (1 - tolerate) && i > 0; i--) { List<Integer> primeFactors = Utilities.primeFactors(i); if (primeFactors.size() > bestPrimes.size()) { bestR = i; bestPrimes = primeFactors; } } return bestR; } private void createRegionMap() { regionIDsMap = new HashMap<String, Integer>(); CellIterator gen = new CellIterator(_rd); while (gen.hasNext()) { List<Integer> cellIndex = gen.next(); mapRegionID(cellIndex); } } private int mapRegionID(List<Integer> regionIndex) { assert _rd.length == regionIndex.size(); // Look up at cache first if (useRegionMapCache) { assert regionIDsMap.containsKey(getMappingIndexes(regionIndex)); return regionIDsMap.get(getMappingIndexes(regionIndex)); } // Compute if not found in cache int regionID = 0; for (int i = regionIndex.size() - 1; i >= 0; i--) { int dimAmount = regionIndex.get(i); for (int dim = _rd.length - 1; dim > i; dim--) { dimAmount *= _rd[dim]; } regionID += dimAmount; } return regionID; } @Override public List<Integer> getRegionIDs(Dimension dim) { final List<Integer> regionIDs = new ArrayList<Integer>(); if (dim.val() >= 0 && dim.val() < sizes.length) { final int randomIndex = rand.nextInt(_rd[dim.val()]); CellIterator gen = new CellIterator(_rd, dim.val(), randomIndex); while (gen.hasNext()) { List<Integer> cellIndex = gen.next(); int regionID = mapRegionID(cellIndex); regionIDs.add(regionID); } assert regionIDs.size() == Utilities.multiply(_rd) / _rd[dim.val()]; } else { LOG.info("ERROR not a possible index assignment."); } return regionIDs; } @Override public List<Integer> getRegionIDs(Dimension dim, KeyType key) { throw new RuntimeException("This method is content-insenstive"); } @Override public String toString() { return getMappingDimensions(); } @Override public String getMappingDimensions() { StringBuilder sb = new StringBuilder(); String prefix = ""; for (int r : _rd) { sb.append(prefix); prefix = "-"; sb.append(r); } return sb.toString(); } public static String getMappingIndexes(List<Integer> regionIndex) { StringBuilder sb = new StringBuilder(); String prefix = ""; for (Integer r : regionIndex) { sb.append(prefix); prefix = "-"; sb.append(r); } return sb.toString(); } @Override public int getNumberOfRegions(Dimension dim) { if (dim.val() >= 0 && dim.val() < _rd.length) { return _rd[dim.val()]; } else { throw new RuntimeException("Dimension is invalid"); } } /** * The actual number of regions with the best prime factorization. */ public int getNumberOfRegions() { return Utilities.multiply(_rd); } public static void main(String[] args) { List<CubeNAssignmentBruteForce> tests = Arrays.asList(new CubeNAssignmentBruteForce(Utilities.arrayOf(13, 7), 1, -1), new CubeNAssignmentBruteForce(Utilities.arrayOf(4, 4, 4), 8, -1), new CubeNAssignmentBruteForce(Utilities.arrayOf(4, 4, 4, 4), 16, -1), new CubeNAssignmentBruteForce( Utilities.arrayOf(8, 4, 10, 7), 1000, -1), new CubeNAssignmentBruteForce(Utilities.arrayOf(10, 10, 10, 10), 1021, -1)); for (CubeNAssignmentBruteForce test : tests) { LOG.info("Input: " + Arrays.toString(test.sizes)); LOG.info("#Reducers each dimension: " + test.toString()); for (int i = 0; i < 3; i++) { LOG.info("Get Regions of dimension 1: " + test.getRegionIDs(Dimension.d(0)).toString()); } } } }