/* * * * Copyright (c) 2011-2015 EPFL DATA Laboratory * * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * * * All rights reserved. * * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * */ package ch.epfl.data.squall.thetajoin.matrix_assignment; import ch.epfl.data.squall.storm_components.hash_hypercube.HashHyperCubeGrouping.EmitterDesc; import ch.epfl.data.squall.types.Type; import org.apache.log4j.Logger; import java.io.Serializable; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.util.Arrays; import java.util.Set; import java.util.HashSet; public class HashHyperCubeAssignmentBruteForce implements Serializable, HashHyperCubeAssignment { private static Logger LOG = Logger.getLogger(HashHyperCubeAssignmentBruteForce.class); int reducers; List<EmitterDesc> emitters; List<ColumnDesc> columns; int[] dimensions; Map<String, Integer> regionIDsMap; public HashHyperCubeAssignmentBruteForce(int reducers, List<ColumnDesc> columns, List<EmitterDesc> emitters) { this.reducers = reducers; this.columns = columns; this.emitters = emitters; compute(); createRegionMap(); } private void compute() { for (int i = reducers; i <= reducers; i++) { int[] best = compute(i); if (dimensions == null) { dimensions = new int[best.length]; Utilities.copy(best, dimensions); } // If new assignment is better than the best assignment so far if (compareWorkloads(best, dimensions) < 0) { Utilities.copy(best, dimensions); } } } private int[] compute(int r) { int[] partition = new int[columns.size()]; // Find the prime factors of the r. final List<Integer> primeFactors = Utilities.primeFactors(r); // Get the Power Set, and iterate over it... List<List<Integer>> powerSet = new ArrayList<List<Integer>>(Utilities.powerSet(primeFactors)); SetArrangementIterator generator = new SetArrangementIterator(powerSet, partition.length); int count = 0; int[] rd = new int[columns.size()]; while (generator.hasNext()) { List<List<Integer>> combination = generator.next(); for (int dim = 0; dim < rd.length; dim++) { rd[dim] = Utilities.multiply(combination.get(dim)); } if (Utilities.multiply(rd) != r) continue; if (count == 0) { Utilities.copy(rd, partition); } else { // If new assignment is better than the best assignment so far if (compareWorkloads(rd, partition) < 0) { Utilities.copy(rd, partition); } } count++; } return partition; } int compareWorkloads(int[] p1, int[] p2) { long workload1 = getWorkload(p1); long workload2 = getWorkload(p2); int maxDim1 = getMaxDimension(p1); int maxDim2 = getMaxDimension(p2); if (workload1 < workload2) return -1; else if (workload2 < workload1) return 1; else // choose wich has lower maximum dimension size return maxDim1 - maxDim2; } public long getWorkload(int[] partition) { long workload = 0; for (EmitterDesc emitter : emitters) { Set<String> emitterColumns = new HashSet<String>(Arrays.asList(emitter.columnNames)); int replicate = 1; for (int i = 0; i < partition.length; i++) { if (emitterColumns.contains(columns.get(i).name)) { replicate *= partition[i]; } } workload += emitter.cardinality / replicate; } return workload; } public int getMaxDimension(int[] partition) { int max = partition[0]; for (int i = 0; i < partition.length; i++) { max = Math.max(max, partition[i]); } return max; } @Override public List<Integer> getRegionIDs(Map<String, String> c) { List<Integer> regions = new ArrayList<Integer>(); int[] fixedDim = new int[c.size()]; int[] fixedIndex = new int[c.size()]; int index = 0; for (int i = 0; i < columns.size(); i++) { if (c.containsKey(columns.get(i).name)) { // calculate hash value String value = c.get(columns.get(i).name); int hashValue = Math.abs(value.hashCode()) % dimensions[i]; // if (value.equals("42467638") || value.equals("5325333") || // value.equals("41718572") || value.equals("41410181")) // hashValue = 0; fixedDim[index] = i; fixedIndex[index] = hashValue; index++; } } CellIterator gen = new CellIterator(dimensions, fixedDim, fixedIndex); while (gen.hasNext()) { List<Integer> cellIndex = gen.next(); int regionID = regionIDsMap.get(mapRegionKey(cellIndex)); regions.add(regionID); } return regions; } private void createRegionMap() { regionIDsMap = new HashMap<String, Integer>(); CellIterator gen = new CellIterator(dimensions); int i = 0; while (gen.hasNext()) { List<Integer> cellIndex = gen.next(); regionIDsMap.put(mapRegionKey(cellIndex), i++); } } private String mapRegionKey(List<Integer> cellIndex) { StringBuilder key = new StringBuilder(""); for (Integer index : cellIndex) { key.append(" " + index); } return key.toString(); } @Override public int getNumberOfRegions(String column) { throw new RuntimeException("Dimension is invalid"); } @Override public String getMappingDimensions() { LOG.info(columns); StringBuilder sb = new StringBuilder(); String prefix = ""; for (int r : dimensions) { sb.append(prefix); prefix = "-"; sb.append(r); } return sb.toString(); } public static class ColumnDesc implements Serializable { public String name; public Type type; public int dimension; public long size; public ColumnDesc(String name, Type type, long size) { this(name, type, size, -1); } public ColumnDesc(String name, Type type, long size, int dimension) { this.name = name; this.type = type; this.size = size; this.dimension = dimension; } public String toString() { return name + " " + type + " " + dimension + " " + size; } } }