package org.apache.lucene.search.grouping.dv;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type; // javadocs
import org.apache.lucene.search.grouping.AbstractAllGroupsCollector;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.*;
/**
* Implementation of {@link AbstractAllGroupsCollector} that groups documents based on
* {@link DocValues} fields.
*
* @lucene.experimental
*/
public abstract class DVAllGroupsCollector<GROUP_VALUE_TYPE> extends AbstractAllGroupsCollector<GROUP_VALUE_TYPE> {
private static final int DEFAULT_INITIAL_SIZE = 128;
/**
* Expert: Constructs a {@link DVAllGroupsCollector}.
* Selects and constructs the most optimal all groups collector implementation for grouping by {@link DocValues}.
*
*
* @param groupField The field to group by
* @param type The {@link Type} which is used to select a concrete implementation.
* @param diskResident Whether the values to group by should be disk resident
* @param initialSize The initial allocation size of the
* internal int set and group list
* which should roughly match the total
* number of expected unique groups. Be aware that the
* heap usage is 4 bytes * initialSize. Not all concrete implementions use this!
* @return the most optimal all groups collector implementation for grouping by {@link DocValues}
*/
@SuppressWarnings("unchecked")
public static <T> DVAllGroupsCollector<T> create(String groupField, DocValues.Type type, boolean diskResident, int initialSize) {
switch (type) {
case VAR_INTS:
case FIXED_INTS_8:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
// Type erasure b/c otherwise we have inconvertible types...
return (DVAllGroupsCollector) new Lng(groupField, type, diskResident);
case FLOAT_32:
case FLOAT_64:
// Type erasure b/c otherwise we have inconvertible types...
return (DVAllGroupsCollector) new Dbl(groupField, type, diskResident);
case BYTES_FIXED_STRAIGHT:
case BYTES_FIXED_DEREF:
case BYTES_VAR_STRAIGHT:
case BYTES_VAR_DEREF:
// Type erasure b/c otherwise we have inconvertible types...
return (DVAllGroupsCollector) new BR(groupField, type, diskResident);
case BYTES_VAR_SORTED:
case BYTES_FIXED_SORTED:
// Type erasure b/c otherwise we have inconvertible types...
return (DVAllGroupsCollector) new SortedBR(groupField, type, diskResident, initialSize);
default:
throw new IllegalArgumentException(String.format(Locale.ROOT, "ValueType %s not supported", type));
}
}
/**
* Constructs a {@link DVAllGroupsCollector}.
* Selects and constructs the most optimal all groups collector implementation for grouping by {@link DocValues}.
* If implementations require an initial allocation size then this will be set to 128.
*
*
* @param groupField The field to group by
* @param type The {@link Type} which is used to select a concrete implementation.
* @param diskResident Wether the values to group by should be disk resident
* @return the most optimal all groups collector implementation for grouping by {@link DocValues}
*/
public static <T> DVAllGroupsCollector<T> create(String groupField, DocValues.Type type, boolean diskResident) {
return create(groupField, type, diskResident, DEFAULT_INITIAL_SIZE);
}
final String groupField;
final DocValues.Type valueType;
final boolean diskResident;
final Collection<GROUP_VALUE_TYPE> groups;
DVAllGroupsCollector(String groupField, DocValues.Type valueType, boolean diskResident, Collection<GROUP_VALUE_TYPE> groups) {
this.groupField = groupField;
this.valueType = valueType;
this.diskResident = diskResident;
this.groups = groups;
}
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
final DocValues dv = readerContext.reader().docValues(groupField);
final DocValues.Source dvSource;
if (dv != null) {
dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
} else {
dvSource = getDefaultSource(readerContext);
}
setDocValuesSources(dvSource, readerContext);
}
/**
* Sets the idv source for concrete implementations to use.
*
* @param source The idv source to be used by concrete implementations
* @param readerContext The current reader context
*/
protected abstract void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext);
/**
* @return The default source when no doc values are available.
* @param readerContext The current reader context
*/
protected DocValues.Source getDefaultSource(AtomicReaderContext readerContext) {
return DocValues.getDefaultSource(valueType);
}
static class Lng extends DVAllGroupsCollector<Long> {
private DocValues.Source source;
Lng(String groupField, DocValues.Type valueType, boolean diskResident) {
super(groupField, valueType, diskResident, new TreeSet<Long>());
}
public void collect(int doc) throws IOException {
long value = source.getInt(doc);
if (!groups.contains(value)) {
groups.add(value);
}
}
public Collection<Long> getGroups() {
return groups;
}
protected void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext) {
this.source = source;
}
}
static class Dbl extends DVAllGroupsCollector<Double> {
private DocValues.Source source;
Dbl(String groupField, DocValues.Type valueType, boolean diskResident) {
super(groupField, valueType, diskResident, new TreeSet<Double>());
}
public void collect(int doc) throws IOException {
double value = source.getFloat(doc);
if (!groups.contains(value)) {
groups.add(value);
}
}
public Collection<Double> getGroups() {
return groups;
}
protected void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext) {
this.source = source;
}
}
static class BR extends DVAllGroupsCollector<BytesRef> {
private final BytesRef spare = new BytesRef();
private DocValues.Source source;
BR(String groupField, DocValues.Type valueType, boolean diskResident) {
super(groupField, valueType, diskResident, new TreeSet<BytesRef>());
}
public void collect(int doc) throws IOException {
BytesRef value = source.getBytes(doc, spare);
if (!groups.contains(value)) {
groups.add(BytesRef.deepCopyOf(value));
}
}
public Collection<BytesRef> getGroups() {
return groups;
}
protected void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext) {
this.source = source;
}
}
static class SortedBR extends DVAllGroupsCollector<BytesRef> {
private final SentinelIntSet ordSet;
private final BytesRef spare = new BytesRef();
private DocValues.SortedSource source;
SortedBR(String groupField, DocValues.Type valueType, boolean diskResident, int initialSize) {
super(groupField, valueType, diskResident, new ArrayList<BytesRef>(initialSize));
ordSet = new SentinelIntSet(initialSize, -1);
}
public void collect(int doc) throws IOException {
int ord = source.ord(doc);
if (!ordSet.exists(ord)) {
ordSet.put(ord);
BytesRef value = source.getBytes(doc, new BytesRef());
groups.add(value);
}
}
public Collection<BytesRef> getGroups() {
return groups;
}
protected void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext) {
this.source = source.asSortedSource();
ordSet.clear();
for (BytesRef countedGroup : groups) {
int ord = this.source.getOrdByValue(countedGroup, spare);
if (ord >= 0) {
ordSet.put(ord);
}
}
}
@Override
protected DocValues.Source getDefaultSource(AtomicReaderContext readerContext) {
return DocValues.getDefaultSortedSource(valueType, readerContext.reader().maxDoc());
}
}
}