/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import java.io.Closeable;
import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
* Accumulates statistics separated by a slot number.
* There is a separate statistic per slot. The slot is usually an ordinal into a set of values, e.g. tracking a count
* frequency <em>per term</em>.
* Sometimes there doesn't need to be a slot distinction, in which case there is just one nominal slot.
*/
public abstract class SlotAcc implements Closeable {
String key; // todo...
protected final FacetContext fcontext;
public SlotAcc(FacetContext fcontext) {
this.fcontext = fcontext;
}
public void setNextReader(LeafReaderContext readerContext) throws IOException {}
public abstract void collect(int doc, int slot) throws IOException;
public int collect(DocSet docs, int slot) throws IOException {
int count = 0;
SolrIndexSearcher searcher = fcontext.searcher;
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
int segBase = 0;
int segMax;
int adjustedMax = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext();) {
final int doc = docsIt.nextDoc();
if (doc >= adjustedMax) {
do {
ctx = ctxIt.next();
if (ctx == null) {
// should be impossible
throw new RuntimeException("INTERNAL FACET ERROR");
}
segBase = ctx.docBase;
segMax = ctx.reader().maxDoc();
adjustedMax = segBase + segMax;
} while (doc >= adjustedMax);
assert doc >= ctx.docBase;
setNextReader(ctx);
}
count++;
collect(doc - segBase, slot); // per-seg collectors
}
return count;
}
public abstract int compare(int slotA, int slotB);
public abstract Object getValue(int slotNum) throws IOException;
public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
if (key == null) return;
Object val = getValue(slotNum);
if (val != null) {
bucket.add(key, val);
}
}
public abstract void reset() throws IOException;
public abstract void resize(Resizer resizer);
@Override
public void close() throws IOException {}
public static abstract class Resizer {
public abstract int getNewSize();
public abstract int getNewSlot(int oldSlot);
public double[] resize(double[] old, double defaultValue) {
double[] values = new double[getNewSize()];
if (defaultValue != 0) {
Arrays.fill(values, 0, values.length, defaultValue);
}
for (int i = 0; i < old.length; i++) {
double val = old[i];
if (val != defaultValue) {
int newSlot = getNewSlot(i);
if (newSlot >= 0) {
values[newSlot] = val;
}
}
}
return values;
}
public int[] resize(int[] old, int defaultValue) {
int[] values = new int[getNewSize()];
if (defaultValue != 0) {
Arrays.fill(values, 0, values.length, defaultValue);
}
for (int i = 0; i < old.length; i++) {
int val = old[i];
if (val != defaultValue) {
int newSlot = getNewSlot(i);
if (newSlot >= 0) {
values[newSlot] = val;
}
}
}
return values;
}
public <T> T[] resize(T[] old, T defaultValue) {
T[] values = (T[]) Array.newInstance(old.getClass().getComponentType(), getNewSize());
if (defaultValue != null) {
Arrays.fill(values, 0, values.length, defaultValue);
}
for (int i = 0; i < old.length; i++) {
T val = old[i];
if (val != defaultValue) {
int newSlot = getNewSlot(i);
if (newSlot >= 0) {
values[newSlot] = val;
}
}
}
return values;
}
} // end class Resizer
}
// TODO: we should really have a decoupled value provider...
// This would enhance reuse and also prevent multiple lookups of same value across diff stats
abstract class FuncSlotAcc extends SlotAcc {
protected final ValueSource valueSource;
protected FunctionValues values;
public FuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(fcontext);
this.valueSource = values;
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
values = valueSource.getValues(fcontext.qcontext, readerContext);
}
}
// have a version that counts the number of times a Slot has been hit? (for avg... what else?)
// TODO: make more sense to have func as the base class rather than double?
// double-slot-func -> func-slot -> slot -> acc
// double-slot-func -> double-slot -> slot -> acc
abstract class DoubleFuncSlotAcc extends FuncSlotAcc {
double[] result; // TODO: use DoubleArray
double initialValue;
public DoubleFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
this(values, fcontext, numSlots, 0);
}
public DoubleFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots, double initialValue) {
super(values, fcontext, numSlots);
this.initialValue = initialValue;
result = new double[numSlots];
if (initialValue != 0) {
reset();
}
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slot) {
return result[slot];
}
@Override
public void reset() {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
result = resizer.resize(result, initialValue);
}
}
abstract class IntSlotAcc extends SlotAcc {
int[] result; // use LongArray32
int initialValue;
public IntSlotAcc(FacetContext fcontext, int numSlots, int initialValue) {
super(fcontext);
this.initialValue = initialValue;
result = new int[numSlots];
if (initialValue != 0) {
reset();
}
}
@Override
public int compare(int slotA, int slotB) {
return Integer.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slot) {
return result[slot];
}
@Override
public void reset() {
Arrays.fill(result, initialValue);
}
@Override
public void resize(Resizer resizer) {
result = resizer.resize(result, initialValue);
}
}
class SumSlotAcc extends DoubleFuncSlotAcc {
public SumSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
}
public void collect(int doc, int slotNum) throws IOException {
double val = values.doubleVal(doc); // todo: worth trying to share this value across multiple stats that need it?
result[slotNum] += val;
}
}
class SumsqSlotAcc extends DoubleFuncSlotAcc {
public SumsqSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
}
@Override
public void collect(int doc, int slotNum) throws IOException {
double val = values.doubleVal(doc);
val = val * val;
result[slotNum] += val;
}
}
class AvgSlotAcc extends DoubleFuncSlotAcc {
int[] counts;
public AvgSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
counts = new int[numSlots];
}
@Override
public void reset() {
super.reset();
for (int i = 0; i < counts.length; i++) {
counts[i] = 0;
}
}
@Override
public void collect(int doc, int slotNum) throws IOException {
double val = values.doubleVal(doc);
if (val != 0 || values.exists(doc)) {
result[slotNum] += val;
counts[slotNum] += 1;
}
}
private double avg(double tot, int count) {
return count == 0 ? 0 : tot / count; // returns 0 instead of NaN.. todo - make configurable? if NaN, we need to
// handle comparisons though...
}
private double avg(int slot) {
return avg(result[slot], counts[slot]); // calc once and cache in result?
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(avg(slotA), avg(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(2);
lst.add(counts[slot]);
lst.add(result[slot]);
return lst;
} else {
return avg(slot);
}
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
counts = resizer.resize(counts, 0);
}
}
class VarianceSlotAcc extends DoubleFuncSlotAcc {
int[] counts;
double[] sum;
public VarianceSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
counts = new int[numSlots];
sum = new double[numSlots];
}
@Override
public void reset() {
super.reset();
Arrays.fill(counts, 0);
Arrays.fill(sum, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
this.counts = resizer.resize(this.counts, 0);
this.sum = resizer.resize(this.sum, 0);
}
private double variance(double sumSq, double sum, int count) {
double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
return val;
}
private double variance(int slot) {
return variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(this.variance(slotA), this.variance(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(3);
lst.add(counts[slot]);
lst.add(result[slot]);
lst.add(sum[slot]);
return lst;
} else {
return this.variance(slot);
}
}
@Override
public void collect(int doc, int slot) throws IOException {
double val = values.doubleVal(doc);
if (values.exists(doc)) {
counts[slot]++;
result[slot] += val * val;
sum[slot] += val;
}
}
}
class StddevSlotAcc extends DoubleFuncSlotAcc {
int[] counts;
double[] sum;
public StddevSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots);
counts = new int[numSlots];
sum = new double[numSlots];
}
@Override
public void reset() {
super.reset();
Arrays.fill(counts, 0);
Arrays.fill(sum, 0);
}
@Override
public void resize(Resizer resizer) {
super.resize(resizer);
this.counts = resizer.resize(this.counts, 0);
this.result = resizer.resize(this.result, 0);
}
private double stdDev(double sumSq, double sum, int count) {
double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
return val;
}
private double stdDev(int slot) {
return stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
@Override
public int compare(int slotA, int slotB) {
return Double.compare(this.stdDev(slotA), this.stdDev(slotB));
}
@Override
public Object getValue(int slot) {
if (fcontext.isShard()) {
ArrayList lst = new ArrayList(3);
lst.add(counts[slot]);
lst.add(result[slot]);
lst.add(sum[slot]);
return lst;
} else {
return this.stdDev(slot);
}
}
@Override
public void collect(int doc, int slot) throws IOException {
double val = values.doubleVal(doc);
if (values.exists(doc)) {
counts[slot]++;
result[slot] += val * val;
sum[slot] += val;
}
}
}
abstract class CountSlotAcc extends SlotAcc {
public CountSlotAcc(FacetContext fcontext) {
super(fcontext);
}
public abstract void incrementCount(int slot, int count);
public abstract int getCount(int slot);
}
class CountSlotArrAcc extends CountSlotAcc {
int[] result;
public CountSlotArrAcc(FacetContext fcontext, int numSlots) {
super(fcontext);
result = new int[numSlots];
}
@Override
public void collect(int doc, int slotNum) { // TODO: count arrays can use fewer bytes based on the number of docs in
// the base set (that's the upper bound for single valued) - look at ttf?
result[slotNum]++;
}
@Override
public int compare(int slotA, int slotB) {
return Integer.compare(result[slotA], result[slotB]);
}
@Override
public Object getValue(int slotNum) throws IOException {
return result[slotNum];
}
public void incrementCount(int slot, int count) {
result[slot] += count;
}
public int getCount(int slot) {
return result[slot];
}
// internal and expert
int[] getCountArray() {
return result;
}
@Override
public void reset() {
Arrays.fill(result, 0);
}
@Override
public void resize(Resizer resizer) {
resizer.resize(result, 0);
}
}
class SortSlotAcc extends SlotAcc {
public SortSlotAcc(FacetContext fcontext) {
super(fcontext);
}
@Override
public void collect(int doc, int slot) throws IOException {
// no-op
}
public int compare(int slotA, int slotB) {
return slotA - slotB;
}
@Override
public Object getValue(int slotNum) {
return slotNum;
}
@Override
public void reset() {
// no-op
}
@Override
public void resize(Resizer resizer) {
// sort slot only works with direct-mapped accumulators
throw new UnsupportedOperationException();
}
}