/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search.facet; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.solr.util.hll.HLL; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.solr.common.util.Hash; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.schema.SchemaField; abstract class UniqueSlotAcc extends SlotAcc { HLLAgg.HLLFactory factory; SchemaField field; FixedBitSet[] arr; int currentDocBase; int[] counts; // populated with the cardinality once int nTerms; public UniqueSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException { super(fcontext); this.factory = factory; arr = new FixedBitSet[numSlots]; this.field = field; } @Override public void reset() throws IOException { counts = null; for (FixedBitSet bits : arr) { if (bits == null) continue; bits.clear(0, bits.length()); } } @Override public void setNextReader(LeafReaderContext readerContext) throws IOException { currentDocBase = readerContext.docBase; } @Override public Object getValue(int slot) throws IOException { if (fcontext.isShard()) { return getShardValue(slot); } if (counts != null) { // will only be pre-populated if this was used for sorting. return counts[slot]; } FixedBitSet bs = arr[slot]; return bs==null ? 0 : bs.cardinality(); } private Object getShardHLL(int slot) throws IOException { FixedBitSet ords = arr[slot]; if (ords == null) return HLLAgg.NO_VALUES; HLL hll = factory.getHLL(); long maxOrd = ords.length(); Hash.LongPair hashResult = new Hash.LongPair(); for(int ord=-1; ++ord < maxOrd;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); // way to avoid recomputing hash across slots? Prob not worth space Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult); // idea: if the set is small enough, just send the hashes? We can add at the top // level or even just do a hash table at the top level. hll.addRaw(hashResult.val1); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("hll", hll.toBytes()); return map; } private Object getShardValue(int slot) throws IOException { if (factory != null) return getShardHLL(slot); FixedBitSet ords = arr[slot]; int unique; if (counts != null) { unique = counts[slot]; } else { unique = ords==null ? 0 : ords.cardinality(); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("unique", unique); map.add("nTerms", nTerms); int maxExplicit=100; // TODO: make configurable // TODO: share values across buckets if (unique > 0) { List lst = new ArrayList( Math.min(unique, maxExplicit) ); long maxOrd = ords.length(); if (ords != null && ords.length() > 0) { for (int ord=0; lst.size() < maxExplicit;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); Object o = field.getType().toObject(field, val); lst.add(o); if (++ord >= maxOrd) break; } } map.add("vals", lst); } return map; } protected abstract BytesRef lookupOrd(int ord) throws IOException; // we only calculate all the counts when sorting by count public void calcCounts() { counts = new int[arr.length]; for (int i=0; i<arr.length; i++) { FixedBitSet bs = arr[i]; counts[i] = bs == null ? 0 : bs.cardinality(); } } @Override public int compare(int slotA, int slotB) { if (counts == null) { // TODO: a more efficient way to do this? prepareSort? calcCounts(); } return counts[slotA] - counts[slotB]; } @Override public void resize(Resizer resizer) { arr = resizer.resize(arr, null); } }