/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.common.cloud; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.Hash; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; // // user!uniqueid // app!user!uniqueid // user/4!uniqueid // app/2!user/4!uniqueid // public class CompositeIdRouter extends HashBasedRouter { public static final String NAME = "compositeId"; public static final String SEPARATOR = "!"; // separator used to optionally specify number of bits to allocate toward first part. public static final int bitsSeparator = '/'; private int bits = 16; @Override public int sliceHash(String id, SolrInputDocument doc, SolrParams params, DocCollection collection) { String shardFieldName = getRouteField(collection); if (shardFieldName != null && doc != null) { Object o = doc.getFieldValue(shardFieldName); if (o == null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No value for :" + shardFieldName + ". Unable to identify shard"); id = o.toString(); } if (id.indexOf(SEPARATOR) < 0) { return Hash.murmurhash3_x86_32(id, 0, id.length(), 0); } return new KeyParser(id).getHash(); } /** * Get Range for a given CompositeId based route key * * @param routeKey to return Range for * @return Range for given routeKey */ public Range keyHashRange(String routeKey) { if (routeKey.indexOf(SEPARATOR) < 0) { int hash = sliceHash(routeKey, null, null, null); return new Range(hash, hash); } return new KeyParser(routeKey).getRange(); } @Override public Collection<Slice> getSearchSlicesSingle(String shardKey, SolrParams params, DocCollection collection) { if (shardKey == null) { // search across whole collection // TODO: this may need modification in the future when shard splitting could cause an overlap return collection.getActiveSlices(); } String id = shardKey; if (shardKey.indexOf(SEPARATOR) < 0) { // shardKey is a simple id, so don't do a range return Collections.singletonList(hashToSlice(Hash.murmurhash3_x86_32(id, 0, id.length(), 0), collection)); } Range completeRange = new KeyParser(id).getRange(); List<Slice> targetSlices = new ArrayList<>(1); for (Slice slice : collection.getActiveSlices()) { Range range = slice.getRange(); if (range != null && range.overlaps(completeRange)) { targetSlices.add(slice); } } return targetSlices; } public List<Range> partitionRangeByKey(String key, Range range) { List<Range> result = new ArrayList<>(3); Range keyRange = keyHashRange(key); if (!keyRange.overlaps(range)) { throw new IllegalArgumentException("Key range does not overlap given range"); } if (keyRange.equals(range)) { return Collections.singletonList(keyRange); } else if (keyRange.isSubsetOf(range)) { result.add(new Range(range.min, keyRange.min - 1)); result.add(keyRange); result.add((new Range(keyRange.max + 1, range.max))); } else if (range.includes(keyRange.max)) { result.add(new Range(range.min, keyRange.max)); result.add(new Range(keyRange.max + 1, range.max)); } else { result.add(new Range(range.min, keyRange.min - 1)); result.add(new Range(keyRange.min, range.max)); } return result; } @Override public List<Range> partitionRange(int partitions, Range range) { int min = range.min; int max = range.max; assert max >= min; if (partitions == 0) return Collections.EMPTY_LIST; long rangeSize = (long) max - (long) min; long rangeStep = Math.max(1, rangeSize / partitions); List<Range> ranges = new ArrayList<>(partitions); long start = min; long end = start; // keep track of the idealized target to avoid accumulating rounding errors long targetStart = min; long targetEnd = targetStart; // Round to avoid splitting hash domains across ranges if such rounding is not significant. // With default bits==16, one would need to create more than 4000 shards before this // becomes false by default. int mask = 0x0000ffff; boolean round = rangeStep >= (1 << bits) * 16; while (end < max) { targetEnd = targetStart + rangeStep; end = targetEnd; if (round && ((end & mask) != mask)) { // round up or down? int increment = 1 << bits; // 0x00010000 long roundDown = (end | mask) - increment; long roundUp = (end | mask) + increment; if (end - roundDown < roundUp - end && roundDown > start) { end = roundDown; } else { end = roundUp; } } // make last range always end exactly on MAX_VALUE if (ranges.size() == partitions - 1) { end = max; } ranges.add(new Range((int) start, (int) end)); start = end + 1L; targetStart = targetEnd + 1L; } return ranges; } /** * Helper class to calculate parts, masks etc for an id. */ static class KeyParser { String key; int[] numBits; int[] hashes; int[] masks; boolean triLevel; int pieces; public KeyParser(final String key) { this.key = key; List<String> partsList = new ArrayList<>(3); int firstSeparatorPos = key.indexOf(SEPARATOR); if (-1 == firstSeparatorPos) { partsList.add(key); } else { partsList.add(key.substring(0, firstSeparatorPos)); int lastPos = key.length() - 1; // Don't make any more parts if the first separator is the last char if (firstSeparatorPos < lastPos) { int secondSeparatorPos = key.indexOf(SEPARATOR, firstSeparatorPos + 1); if (-1 == secondSeparatorPos) { partsList.add(key.substring(firstSeparatorPos + 1)); } else if (secondSeparatorPos == lastPos) { // Don't make any more parts if the key has exactly two separators and // they're the last two chars - back-compatibility with the behavior of // String.split() - see SOLR-6257. if (firstSeparatorPos < secondSeparatorPos - 1) { partsList.add(key.substring(firstSeparatorPos + 1, secondSeparatorPos)); } } else { // The second separator is not the last char partsList.add(key.substring(firstSeparatorPos + 1, secondSeparatorPos)); partsList.add(key.substring(secondSeparatorPos + 1)); } // Ignore any further separators beyond the first two } } pieces = partsList.size(); String[] parts = partsList.toArray(new String[pieces]); numBits = new int[2]; if (key.endsWith("!") && pieces < 3) pieces++; hashes = new int[pieces]; if (pieces == 3) { numBits[0] = 8; numBits[1] = 8; triLevel = true; } else { numBits[0] = 16; triLevel = false; } for (int i = 0; i < pieces; i++) { if (i < pieces - 1) { int commaIdx = parts[i].indexOf(bitsSeparator); if (commaIdx > 0) { numBits[i] = getNumBits(parts[i], commaIdx); parts[i] = parts[i].substring(0, commaIdx); } } //Last component of an ID that ends with a '!' if(i >= parts.length) hashes[i] = Hash.murmurhash3_x86_32("", 0, "".length(), 0); else hashes[i] = Hash.murmurhash3_x86_32(parts[i], 0, parts[i].length(), 0); } masks = getMasks(); } Range getRange() { int lowerBound; int upperBound; if (triLevel) { lowerBound = hashes[0] & masks[0] | hashes[1] & masks[1]; upperBound = lowerBound | masks[2]; } else { lowerBound = hashes[0] & masks[0]; upperBound = lowerBound | masks[1]; } // If the upper bits are 0xF0000000, the range we want to cover is // 0xF0000000 0xFfffffff if ((masks[0] == 0 && !triLevel) || (masks[0] == 0 && masks[1] == 0 && triLevel)) { // no bits used from first part of key.. the code above will produce 0x000000000->0xffffffff // which only works on unsigned space, but we're using signed space. lowerBound = Integer.MIN_VALUE; upperBound = Integer.MAX_VALUE; } Range r = new Range(lowerBound, upperBound); return r; } /** * Get bit masks for routing based on routing level */ private int[] getMasks() { int[] masks; if (triLevel) masks = getBitMasks(numBits[0], numBits[1]); else masks = getBitMasks(numBits[0]); return masks; } private int[] getBitMasks(int firstBits, int secondBits) { // java can't shift 32 bits int[] masks = new int[3]; masks[0] = firstBits == 0 ? 0 : (-1 << (32 - firstBits)); masks[1] = (firstBits + secondBits) == 0 ? 0 : (-1 << (32 - firstBits - secondBits)); masks[1] = masks[0] ^ masks[1]; masks[2] = (firstBits + secondBits) == 32 ? 0 : ~(masks[0] | masks[1]); return masks; } private int getNumBits(String firstPart, int commaIdx) { int v = 0; for (int idx = commaIdx + 1; idx < firstPart.length(); idx++) { char ch = firstPart.charAt(idx); if (ch < '0' || ch > '9') return -1; v = v * 10 + (ch - '0'); } return v > 32 ? -1 : v; } private int[] getBitMasks(int firstBits) { // java can't shift 32 bits int[] masks; masks = new int[2]; masks[0] = firstBits == 0 ? 0 : (-1 << (32 - firstBits)); masks[1] = firstBits == 32 ? 0 : (-1 >>> firstBits); return masks; } int getHash() { int result = hashes[0] & masks[0]; for (int i = 1; i < pieces; i++) result = result | (hashes[i] & masks[i]); return result; } } }