package org.apache.lucene.search.grouping.dv; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues; import org.apache.lucene.search.grouping.AbstractDistinctValuesCollector; import org.apache.lucene.search.grouping.SearchGroup; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.SentinelIntSet; import org.apache.lucene.index.DocValues.Type; // javadocs import java.io.IOException; import java.util.*; /** * Docvalues implementation of {@link org.apache.lucene.search.grouping.AbstractDistinctValuesCollector}. * * @lucene.experimental */ public abstract class DVDistinctValuesCollector<GC extends AbstractDistinctValuesCollector.GroupCount<?>> extends AbstractDistinctValuesCollector<GC> { final String groupField; final String countField; final boolean diskResident; final Type valueType; DVDistinctValuesCollector(String groupField, String countField, boolean diskResident, Type valueType) { this.groupField = groupField; this.countField = countField; this.diskResident = diskResident; this.valueType = valueType; } /** * Constructs a docvalues based implementation of {@link org.apache.lucene.search.grouping.AbstractDistinctValuesCollector} based on the specified * type. * * @param groupField The field to group by * @param countField The field to count distinct values for * @param groups The top N groups, collected during the first phase search * @param diskResident Whether the values to group and count by should be disk resident * @param type The {@link Type} which is used to select a concrete implementation * @return a docvalues based distinct count collector */ @SuppressWarnings("unchecked") public static <T> DVDistinctValuesCollector<GroupCount<T>> create(String groupField, String countField, Collection<SearchGroup<T>> groups, boolean diskResident, Type type) { switch (type) { case VAR_INTS: case FIXED_INTS_8: case FIXED_INTS_16: case FIXED_INTS_32: case FIXED_INTS_64: // Type erasure b/c otherwise we have inconvertible types... return (DVDistinctValuesCollector) new NonSorted.Lng(groupField, countField, (Collection) groups, diskResident, type); case FLOAT_32: case FLOAT_64: // Type erasure b/c otherwise we have inconvertible types... return (DVDistinctValuesCollector) new NonSorted.Dbl(groupField, countField, (Collection) groups, diskResident, type); case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_DEREF: case BYTES_VAR_STRAIGHT: case BYTES_VAR_DEREF: // Type erasure b/c otherwise we have inconvertible types... return (DVDistinctValuesCollector) new NonSorted.BR(groupField, countField, (Collection) groups, diskResident, type); case BYTES_VAR_SORTED: case BYTES_FIXED_SORTED: // Type erasure b/c otherwise we have inconvertible types... return (DVDistinctValuesCollector) new Sorted.BR(groupField, countField, (Collection) groups, diskResident, type); default: throw new IllegalArgumentException(String.format(Locale.ROOT, "ValueType %s not supported", type)); } } static abstract class NonSorted<K> extends DVDistinctValuesCollector<NonSorted.GroupCount> { final Map<K, GroupCount> groupMap = new LinkedHashMap<K, GroupCount>(); DocValues.Source groupFieldSource; DocValues.Source countFieldSource; NonSorted(String groupField, String countField, boolean diskResident, Type valueType) { super(groupField, countField, diskResident, valueType); } public List<GroupCount> getGroups() { return new ArrayList<GroupCount>(groupMap.values()); } public void setNextReader(AtomicReaderContext context) throws IOException { groupFieldSource = retrieveSource(groupField, context); countFieldSource = retrieveSource(countField, context); } private DocValues.Source retrieveSource(String fieldName, AtomicReaderContext context) throws IOException { DocValues groupFieldDv = context.reader().docValues(fieldName); if (groupFieldDv != null) { return diskResident ? groupFieldDv.getDirectSource() : groupFieldDv.getSource(); } else { return DocValues.getDefaultSource(valueType); } } static class Dbl extends NonSorted<Double> { Dbl(String groupField, String countField, Collection<SearchGroup<Double>> groups, boolean diskResident, Type valueType) { super(groupField, countField, diskResident, valueType); for (SearchGroup<Double> group : groups) { groupMap.put(group.groupValue, new GroupCount(group.groupValue)); } } public void collect(int doc) throws IOException { GroupCount groupCount = groupMap.get(groupFieldSource.getFloat(doc)); if (groupCount != null) { groupCount.uniqueValues.add(countFieldSource.getFloat(doc)); } } } static class Lng extends NonSorted<Long> { Lng(String groupField, String countField, Collection<SearchGroup<Long>> groups, boolean diskResident, Type valueType) { super(groupField, countField, diskResident, valueType); for (SearchGroup<Long> group : groups) { groupMap.put(group.groupValue, new GroupCount(group.groupValue)); } } public void collect(int doc) throws IOException { GroupCount groupCount = groupMap.get(groupFieldSource.getInt(doc)); if (groupCount != null) { groupCount.uniqueValues.add(countFieldSource.getInt(doc)); } } } static class BR extends NonSorted<BytesRef> { private final BytesRef spare = new BytesRef(); BR(String groupField, String countField, Collection<SearchGroup<BytesRef>> groups, boolean diskResident, Type valueType) { super(groupField, countField, diskResident, valueType); for (SearchGroup<BytesRef> group : groups) { groupMap.put(group.groupValue, new GroupCount(group.groupValue)); } } public void collect(int doc) throws IOException { GroupCount groupCount = groupMap.get(groupFieldSource.getBytes(doc, spare)); if (groupCount != null) { BytesRef countValue = countFieldSource.getBytes(doc, spare); if (!groupCount.uniqueValues.contains(countValue)) { groupCount.uniqueValues.add(BytesRef.deepCopyOf(countValue)); } } } } static class GroupCount extends AbstractDistinctValuesCollector.GroupCount<Comparable<?>> { GroupCount(Comparable<?> groupValue) { super(groupValue); } } } static abstract class Sorted extends DVDistinctValuesCollector<Sorted.GroupCount> { final SentinelIntSet ordSet; final GroupCount groupCounts[]; final List<GroupCount> groups = new ArrayList<GroupCount>(); DocValues.SortedSource groupFieldSource; DocValues.SortedSource countFieldSource; Sorted(String groupField, String countField, int groupSize, boolean diskResident, Type valueType) { super(groupField, countField, diskResident, valueType); ordSet = new SentinelIntSet(groupSize, -1); groupCounts = new GroupCount[ordSet.keys.length]; } public List<GroupCount> getGroups() { return groups; } public void setNextReader(AtomicReaderContext context) throws IOException { groupFieldSource = retrieveSortedSource(groupField, context); countFieldSource = retrieveSortedSource(countField, context); ordSet.clear(); } private DocValues.SortedSource retrieveSortedSource(String field, AtomicReaderContext context) throws IOException { DocValues countFieldDv = context.reader().docValues(field); if (countFieldDv != null) { return diskResident ? countFieldDv.getDirectSource().asSortedSource() : countFieldDv.getSource().asSortedSource(); } else { return DocValues.getDefaultSortedSource(valueType, context.reader().maxDoc()); } } static class BR extends Sorted { final BytesRef spare = new BytesRef(); BR(String groupField, String countField, Collection<SearchGroup<BytesRef>> searchGroups, boolean diskResident, Type valueType) { super(groupField, countField, searchGroups.size(), diskResident, valueType); for (SearchGroup<BytesRef> group : searchGroups) { this.groups.add(new GroupCount(group.groupValue)); } } public void collect(int doc) throws IOException { int slot = ordSet.find(groupFieldSource.ord(doc)); if (slot < 0) { return; } GroupCount gc = groupCounts[slot]; int countOrd = countFieldSource.ord(doc); if (doesNotContainsOrd(countOrd, gc.ords)) { gc.uniqueValues.add(countFieldSource.getByOrd(countOrd, new BytesRef())); gc.ords = Arrays.copyOf(gc.ords, gc.ords.length + 1); gc.ords[gc.ords.length - 1] = countOrd; if (gc.ords.length > 1) { Arrays.sort(gc.ords); } } } private boolean doesNotContainsOrd(int ord, int[] ords) { if (ords.length == 0) { return true; } else if (ords.length == 1) { return ord != ords[0]; } return Arrays.binarySearch(ords, ord) < 0; } @Override public void setNextReader(AtomicReaderContext context) throws IOException { super.setNextReader(context); for (GroupCount group : groups) { int groupOrd = groupFieldSource.getOrdByValue((BytesRef) group.groupValue, spare); if (groupOrd < 0) { continue; } groupCounts[ordSet.put(groupOrd)] = group; group.ords = new int[group.uniqueValues.size()]; Arrays.fill(group.ords, -1); int i = 0; for (Comparable<?> value : group.uniqueValues) { int countOrd = countFieldSource.getOrdByValue((BytesRef) value, spare); if (countOrd >= 0) { group.ords[i++] = countOrd; } } } } } static class GroupCount extends AbstractDistinctValuesCollector.GroupCount<Comparable<?>> { int[] ords; GroupCount(Comparable<?> groupValue) { super(groupValue); } } } }