package org.apache.lucene.search.grouping.dv;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector;
import org.apache.lucene.search.grouping.SearchGroup;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SentinelIntSet;
import java.io.IOException;
import java.util.Collection;
import java.util.Locale;
/**
* IDV based implementation of {@link AbstractSecondPassGroupingCollector}.
*
* @lucene.experimental
*/
public abstract class DVSecondPassGroupingCollector<GROUP_VALUE> extends AbstractSecondPassGroupingCollector<GROUP_VALUE> {
/**
* Constructs a {@link DVSecondPassGroupingCollector}.
* Selects and constructs the most optimal second pass collector implementation for grouping by {@link DocValues}.
*
* @param groupField The field to group by
* @param diskResident Whether the values to group by should be disk resident
* @param type The {@link Type} which is used to select a concrete implementation.
* @param searchGroups The groups from the first phase search
* @param groupSort The sort used for the groups
* @param withinGroupSort The sort used for documents inside a group
* @param maxDocsPerGroup The maximum number of documents to collect per group
* @param getScores Whether to include scores for the documents inside a group
* @param getMaxScores Whether to keep track of the higest score per group
* @param fillSortFields Whether to include the sort values
* @return the most optimal second pass collector implementation for grouping by {@link DocValues}
* @throws IOException If I/O related errors occur
*/
@SuppressWarnings("unchecked")
public static <T> DVSecondPassGroupingCollector<T> create(String groupField,
boolean diskResident,
DocValues.Type type,
Collection<SearchGroup<T>> searchGroups,
Sort groupSort,
Sort withinGroupSort,
int maxDocsPerGroup,
boolean getScores,
boolean getMaxScores,
boolean fillSortFields) throws IOException {
switch (type) {
case VAR_INTS:
case FIXED_INTS_8:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
// Type erasure b/c otherwise we have inconvertible types...
return (DVSecondPassGroupingCollector) new Lng(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
case FLOAT_32:
case FLOAT_64:
// Type erasure b/c otherwise we have inconvertible types...
return (DVSecondPassGroupingCollector) new Dbl(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
case BYTES_FIXED_STRAIGHT:
case BYTES_FIXED_DEREF:
case BYTES_VAR_STRAIGHT:
case BYTES_VAR_DEREF:
// Type erasure b/c otherwise we have inconvertible types...
return (DVSecondPassGroupingCollector) new BR(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
case BYTES_VAR_SORTED:
case BYTES_FIXED_SORTED:
// Type erasure b/c otherwise we have inconvertible types...
return (DVSecondPassGroupingCollector) new SortedBR(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
default:
throw new IllegalArgumentException(String.format(Locale.ROOT, "ValueType %s not supported", type));
}
}
final String groupField;
final DocValues.Type valueType;
final boolean diskResident;
DVSecondPassGroupingCollector(String groupField, DocValues.Type valueType, boolean diskResident, Collection<SearchGroup<GROUP_VALUE>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
super(searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
this.groupField = groupField;
this.valueType = valueType;
this.diskResident = diskResident;
}
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
final DocValues dv = readerContext.reader().docValues(groupField);
final DocValues.Source dvSource;
if (dv != null) {
dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
} else {
dvSource = getDefaultSource(readerContext);
}
setDocValuesSources(dvSource, readerContext);
}
/**
* Sets the idv source for concrete implementations to use.
*
* @param source The idv source to be used by concrete implementations
* @param readerContext The current reader context
*/
protected abstract void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext);
/**
* @return The default source when no doc values are available.
* @param readerContext The current reader context
*/
protected DocValues.Source getDefaultSource(AtomicReaderContext readerContext) {
return DocValues.getDefaultSource(valueType);
}
static class Lng extends DVSecondPassGroupingCollector<Long> {
private DocValues.Source source;
Lng(String groupField, DocValues.Type valueType, boolean diskResident, Collection<SearchGroup<Long>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
super(groupField, valueType, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
}
protected SearchGroupDocs<Long> retrieveGroup(int doc) throws IOException {
return groupMap.get(source.getInt(doc));
}
protected void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext) {
this.source = source;
}
}
static class Dbl extends DVSecondPassGroupingCollector<Double> {
private DocValues.Source source;
Dbl(String groupField, DocValues.Type valueType, boolean diskResident, Collection<SearchGroup<Double>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
super(groupField, valueType, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
}
protected SearchGroupDocs<Double> retrieveGroup(int doc) throws IOException {
return groupMap.get(source.getFloat(doc));
}
protected void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext) {
this.source = source;
}
}
static class BR extends DVSecondPassGroupingCollector<BytesRef> {
private DocValues.Source source;
private final BytesRef spare = new BytesRef();
BR(String groupField, DocValues.Type valueType, boolean diskResident, Collection<SearchGroup<BytesRef>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
super(groupField, valueType, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
}
protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
return groupMap.get(source.getBytes(doc, spare));
}
@Override
protected void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext) {
this.source = source;
}
}
static class SortedBR extends DVSecondPassGroupingCollector<BytesRef> {
private DocValues.SortedSource source;
private final BytesRef spare = new BytesRef();
private final SentinelIntSet ordSet;
@SuppressWarnings({"unchecked","rawtypes"})
SortedBR(String groupField, DocValues.Type valueType, boolean diskResident, Collection<SearchGroup<BytesRef>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
super(groupField, valueType, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
ordSet = new SentinelIntSet(groupMap.size(), -1);
groupDocs = (SearchGroupDocs<BytesRef>[]) new SearchGroupDocs[ordSet.keys.length];
}
protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
int slot = ordSet.find(source.ord(doc));
if (slot >= 0) {
return groupDocs[slot];
}
return null;
}
@Override
protected void setDocValuesSources(DocValues.Source source, AtomicReaderContext readerContext) {
this.source = source.asSortedSource();
ordSet.clear();
for (SearchGroupDocs<BytesRef> group : groupMap.values()) {
int ord = this.source.getOrdByValue(group.groupValue, spare);
if (ord >= 0) {
groupDocs[ordSet.put(ord)] = group;
}
}
}
@Override
protected DocValues.Source getDefaultSource(AtomicReaderContext readerContext) {
return DocValues.getDefaultSortedSource(valueType, readerContext.reader().maxDoc());
}
}
}