/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.aggregations.bucket; import org.elasticsearch.test.ESTestCase; import static org.hamcrest.Matchers.greaterThanOrEqualTo; public class BucketUtilsTests extends ESTestCase { public void testBadInput() { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> BucketUtils.suggestShardSideQueueSize(0, 10)); assertEquals(e.getMessage(), "size must be positive, got 0"); e = expectThrows(IllegalArgumentException.class, () -> BucketUtils.suggestShardSideQueueSize(10, 0)); assertEquals(e.getMessage(), "number of shards must be positive, got 0"); } public void testOptimizesSingleShard() { for (int iter = 0; iter < 10; ++iter) { final int size = randomIntBetween(1, Integer.MAX_VALUE); assertEquals(size, BucketUtils.suggestShardSideQueueSize( size, 1)); } } public void testOverFlow() { for (int iter = 0; iter < 10; ++iter) { final int size = Integer.MAX_VALUE - randomInt(10); final int numberOfShards = randomIntBetween(1, 10); final int shardSize = BucketUtils.suggestShardSideQueueSize( size, numberOfShards); assertThat(shardSize, greaterThanOrEqualTo(shardSize)); } } public void testShardSizeIsGreaterThanGlobalSize() { for (int iter = 0; iter < 10; ++iter) { final int size = randomIntBetween(1, Integer.MAX_VALUE); final int numberOfShards = randomIntBetween(1, 10); final int shardSize = BucketUtils.suggestShardSideQueueSize( size, numberOfShards); assertThat(shardSize, greaterThanOrEqualTo(size)); } } /*// You may use the code below to evaluate the impact of the BucketUtils.suggestShardSideQueueSize // heuristic public static void main(String[] args) { final int numberOfUniqueTerms = 10000; final int totalNumberOfTerms = 1000000; final int numberOfShards = 10; final double skew = 2; // parameter of the zipf distribution final int size = 100; double totalWeight = 0; for (int rank = 1; rank <= numberOfUniqueTerms; ++rank) { totalWeight += weight(rank, skew); } int[] terms = new int[totalNumberOfTerms]; int len = 0; final int[] actualTopFreqs = new int[size]; for (int rank = 1; len < totalNumberOfTerms; ++rank) { int freq = (int) (weight(rank, skew) / totalWeight * totalNumberOfTerms); freq = Math.max(freq, 1); Arrays.fill(terms, len, Math.min(len + freq, totalNumberOfTerms), rank - 1); len += freq; if (rank <= size) { actualTopFreqs[rank-1] = freq; } } final int maxTerm = terms[terms.length - 1] + 1; // shuffle terms Random r = new Random(0); for (int i = terms.length - 1; i > 0; --i) { final int swapWith = r.nextInt(i); int tmp = terms[i]; terms[i] = terms[swapWith]; terms[swapWith] = tmp; } // distribute into shards like routing would int[][] shards = new int[numberOfShards][]; int upTo = 0; for (int i = 0; i < numberOfShards; ++i) { shards[i] = Arrays.copyOfRange(terms, upTo, upTo + (terms.length - upTo) / (numberOfShards - i)); upTo += shards[i].length; } final int[][] topShards = new int[numberOfShards][]; final int shardSize = BucketUtils.suggestShardSideQueueSize(size, numberOfShards); for (int shard = 0; shard < numberOfShards; ++shard) { final int[] data = shards[shard]; final int[] freqs = new int[maxTerm]; for (int d : data) { freqs[d]++; } int[] termIds = new int[maxTerm]; for (int i = 0; i < maxTerm; ++i) { termIds[i] = i; } new InPlaceMergeSorter() { @Override protected void swap(int i, int j) { int tmp = termIds[i]; termIds[i] = termIds[j]; termIds[j] = tmp; tmp = freqs[i]; freqs[i] = freqs[j]; freqs[j] = tmp; } @Override protected int compare(int i, int j) { return freqs[j] - freqs[i]; } }.sort(0, maxTerm); Arrays.fill(freqs, shardSize, freqs.length, 0); new InPlaceMergeSorter() { @Override protected void swap(int i, int j) { int tmp = termIds[i]; termIds[i] = termIds[j]; termIds[j] = tmp; tmp = freqs[i]; freqs[i] = freqs[j]; freqs[j] = tmp; } @Override protected int compare(int i, int j) { return termIds[i] - termIds[j]; } }.sort(0, maxTerm); topShards[shard] = freqs; } final int[] computedTopFreqs = new int[size]; for (int[] freqs : topShards) { for (int i = 0; i < size; ++i) { computedTopFreqs[i] += freqs[i]; } } int numErrors = 0; int totalFreq = 0; for (int i = 0; i < size; ++i) { numErrors += Math.abs(computedTopFreqs[i] - actualTopFreqs[i]); totalFreq += actualTopFreqs[i]; } System.out.println("Number of unique terms: " + maxTerm); System.out.println("Global freqs of top terms: " + Arrays.toString(actualTopFreqs)); System.out.println("Computed freqs of top terms: " + Arrays.toString(computedTopFreqs)); System.out.println("Number of errors: " + numErrors + "/" + totalFreq); } private static double weight(int rank, double skew) { return 1d / Math.pow(rank, skew); }*/ }