/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
public class UniqueAgg extends StrAggValueSource {
public static String UNIQUE = "unique";
// internal constants used for aggregating values from multiple shards
static String VALS = "vals";
public UniqueAgg(String field) {
super(UNIQUE, field);
}
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg());
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
return new UniqueMultiDvSlotAcc(fcontext, sf, numSlots, null);
} else {
return new UniqueMultivaluedSlotAcc(fcontext, sf, numSlots, null);
}
} else {
if (sf.getType().getNumberType() != null) {
return new NumericAcc(fcontext, getArg(), numSlots);
} else {
return new UniqueSinglevaluedSlotAcc(fcontext, sf, numSlots, null);
}
}
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new Merger();
}
private static class Merger extends FacetSortableMerger {
long answer = -1;
long sumUnique;
Set<Object> values;
long sumAdded;
long shardsMissingSum;
long shardsMissingMax;
@Override
public void merge(Object facetResult, Context mcontext) {
SimpleOrderedMap map = (SimpleOrderedMap)facetResult;
long unique = ((Number)map.get("unique")).longValue();
sumUnique += unique;
int valsListed = 0;
List vals = (List) map.get("vals");
if (vals != null) {
if (values == null) {
values = new HashSet<>(vals.size()*4);
}
values.addAll(vals);
valsListed = vals.size();
sumAdded += valsListed;
}
shardsMissingSum += unique - valsListed;
shardsMissingMax = Math.max(shardsMissingMax, unique - valsListed);
// TODO: somehow get & use the count in the bucket?
}
private long getLong() {
if (answer >= 0) return answer;
answer = values == null ? 0 : values.size();
if (answer == 0) {
// either a real "0", or no values returned from shards
answer = shardsMissingSum;
return answer;
}
double factor = ((double)values.size()) / sumAdded; // what fraction of listed values were unique
long estimate = (long)(shardsMissingSum * factor);
answer = values.size() + estimate;
return answer;
}
@Override
public Object getMergedResult() {
return getLong();
}
@Override
public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Long.compare( getLong(), ((Merger)other).getLong() );
}
}
static class LongSet {
static final float LOAD_FACTOR = 0.7f;
long[] vals;
int cardinality;
int mask;
int threshold;
int zeroCount; // 1 if a 0 was collected
/** sz must be a power of two */
LongSet(int sz) {
vals = new long[sz];
mask = sz - 1;
threshold = (int) (sz * LOAD_FACTOR);
}
void add(long val) {
if (val == 0) {
zeroCount = 1;
return;
}
if (cardinality >= threshold) {
rehash();
}
// For floats: exponent bits start at bit 23 for single precision,
// and bit 52 for double precision.
// Many values will only have significant bits just to the right of that,
// and the leftmost bits will all be zero.
// For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
// The upper bits of our hash will be irrelevant.
int h = (int) (val + (val >>> 44) + (val >>> 15));
for (int slot = h & mask; ;slot = (slot + 1) & mask) {
long v = vals[slot];
if (v == 0) {
vals[slot] = val;
cardinality++;
break;
} else if (v == val) {
// val is already in the set
break;
}
}
}
private void rehash() {
long[] oldVals = vals;
int newCapacity = vals.length << 1;
vals = new long[newCapacity];
mask = newCapacity - 1;
threshold = (int) (newCapacity * LOAD_FACTOR);
cardinality = 0;
for (long val : oldVals) {
if (val != 0) {
add(val);
}
}
}
int cardinality() {
return cardinality + zeroCount;
}
}
static class NumericAcc extends SlotAcc {
SchemaField sf;
LongSet[] sets;
NumericDocValues values;
public NumericAcc(FacetContext fcontext, String field, int numSlots) throws IOException {
super(fcontext);
sf = fcontext.searcher.getSchema().getField(field);
sets = new LongSet[numSlots];
}
@Override
public void reset() {
sets = new LongSet[sets.length];
}
@Override
public void resize(Resizer resizer) {
sets = resizer.resize(sets, null);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
values = DocValues.getNumeric(readerContext.reader(), sf.getName());
}
@Override
public void collect(int doc, int slot) throws IOException {
int valuesDocID = values.docID();
if (valuesDocID < doc) {
valuesDocID = values.advance(doc);
}
if (valuesDocID > doc) {
// missing
return;
}
long val = values.longValue();
LongSet set = sets[slot];
if (set == null) {
set = sets[slot] = new LongSet(16);
}
// TODO: could handle 0s at this level too
set.add(val);
}
@Override
public Object getValue(int slot) throws IOException {
if (fcontext.isShard()) {
return getShardValue(slot);
}
return getCardinality(slot);
}
private int getCardinality(int slot) {
LongSet set = sets[slot];
return set==null ? 0 : set.cardinality();
}
public Object getShardValue(int slot) throws IOException {
LongSet set = sets[slot];
int unique = getCardinality(slot);
SimpleOrderedMap map = new SimpleOrderedMap();
map.add("unique", unique);
int maxExplicit=100;
// TODO: make configurable
// TODO: share values across buckets
if (unique <= maxExplicit) {
List lst = new ArrayList( Math.min(unique, maxExplicit) );
if (set != null) {
if (set.zeroCount > 0) {
lst.add(0);
}
for (long val : set.vals) {
if (val != 0) {
lst.add(val);
}
}
}
map.add("vals", lst);
}
return map;
}
@Override
public int compare(int slotA, int slotB) {
return getCardinality(slotA) - getCardinality(slotB);
}
}
}