package org.apache.lucene.search.grouping.dv; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.search.grouping.AbstractGroupFacetCollector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.SentinelIntSet; import org.apache.lucene.util.UnicodeUtil; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Locale; /** * An implementation of {@link AbstractGroupFacetCollector} that computes grouped facets based on docvalues. * * @lucene.experimental */ public abstract class DVGroupFacetCollector extends AbstractGroupFacetCollector { final Type groupDvType; final boolean groupDiskResident; final Type facetFieldDvType; final boolean facetDiskResident; final List<GroupedFacetHit> groupedFacetHits; final SentinelIntSet segmentGroupedFacetHits; /** * Factory method for creating the right implementation based on the group docvalues type and the facet docvalues * type. * * Currently only the {@link Type#BYTES_VAR_SORTED} and the {@link Type#BYTES_FIXED_SORTED} are * the only docvalues type supported for both the group and facet field. * * @param groupField The group field * @param groupDvType The docvalues type for the group field * @param groupDiskResident Whether the group docvalues should be disk resident * @param facetField The facet field * @param facetDvType The docvalues type for the facet field * @param facetDiskResident Whether the facet docvalues should be disk resident * @param facetPrefix The facet prefix a facet entry should start with to be included. * @param initialSize The initial allocation size of the internal int set and group facet list which should roughly * match the total number of expected unique groups. Be aware that the heap usage is * 4 bytes * initialSize. * @return a <code>DVGroupFacetCollector</code> implementation */ public static DVGroupFacetCollector createDvGroupFacetCollector(String groupField, Type groupDvType, boolean groupDiskResident, String facetField, Type facetDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) { switch (groupDvType) { case VAR_INTS: case FIXED_INTS_8: case FIXED_INTS_16: case FIXED_INTS_32: case FIXED_INTS_64: case FLOAT_32: case FLOAT_64: case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_DEREF: case BYTES_VAR_STRAIGHT: case BYTES_VAR_DEREF: throw new IllegalArgumentException(String.format(Locale.ROOT, "Group valueType %s not supported", groupDvType)); case BYTES_VAR_SORTED: case BYTES_FIXED_SORTED: return GroupSortedBR.createGroupSortedFacetCollector(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize); default: throw new IllegalArgumentException(String.format(Locale.ROOT, "Group valueType %s not supported", groupDvType)); } } DVGroupFacetCollector(String groupField, Type groupDvType, boolean groupDiskResident, String facetField, Type facetFieldDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) { super(groupField, facetField, facetPrefix); this.groupDvType = groupDvType; this.groupDiskResident = groupDiskResident; this.facetFieldDvType = facetFieldDvType; this.facetDiskResident = facetDiskResident; groupedFacetHits = new ArrayList<GroupedFacetHit>(initialSize); segmentGroupedFacetHits = new SentinelIntSet(initialSize, -1); } static abstract class GroupSortedBR extends DVGroupFacetCollector { final BytesRef facetSpare = new BytesRef(); final BytesRef groupSpare = new BytesRef(); DocValues.SortedSource groupFieldSource; GroupSortedBR(String groupField, Type groupDvType, boolean groupDiskResident, String facetField, Type facetFieldDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) { super(groupField, groupDvType, groupDiskResident, facetField, facetFieldDvType, facetDiskResident, facetPrefix, initialSize); } static DVGroupFacetCollector createGroupSortedFacetCollector(String groupField, Type groupDvType, boolean groupDiskResident, String facetField, Type facetDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) { switch (facetDvType) { case VAR_INTS: case FIXED_INTS_8: case FIXED_INTS_16: case FIXED_INTS_32: case FIXED_INTS_64: case FLOAT_32: case FLOAT_64: case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_DEREF: case BYTES_VAR_STRAIGHT: case BYTES_VAR_DEREF: throw new IllegalArgumentException(String.format(Locale.ROOT, "Facet valueType %s not supported", facetDvType)); case BYTES_VAR_SORTED: case BYTES_FIXED_SORTED: return new FacetSortedBR(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize); default: throw new IllegalArgumentException(String.format(Locale.ROOT, "Facet valueType %s not supported", facetDvType)); } } static class FacetSortedBR extends GroupSortedBR { private DocValues.SortedSource facetFieldSource; FacetSortedBR(String groupField, Type groupDvType, boolean groupDiskResident, String facetField, Type facetDvType, boolean diskResident, BytesRef facetPrefix, int initialSize) { super(groupField, groupDvType, groupDiskResident, facetField, facetDvType, diskResident, facetPrefix, initialSize); } public void collect(int doc) throws IOException { int facetOrd = facetFieldSource.ord(doc); if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) { return; } int groupOrd = groupFieldSource.ord(doc); int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd; if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) { return; } segmentTotalCount++; segmentFacetCounts[facetOrd]++; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); groupedFacetHits.add( new GroupedFacetHit( groupFieldSource.getByOrd(groupOrd, new BytesRef()), facetFieldSource.getByOrd(facetOrd, new BytesRef()) ) ); } public void setNextReader(AtomicReaderContext context) throws IOException { if (segmentFacetCounts != null) { segmentResults.add(createSegmentResult()); } groupFieldSource = getDocValuesSortedSource(groupField, groupDvType, groupDiskResident, context.reader()); facetFieldSource = getDocValuesSortedSource(facetField, facetFieldDvType, facetDiskResident, context.reader()); segmentFacetCounts = new int[facetFieldSource.getValueCount()]; segmentTotalCount = 0; segmentGroupedFacetHits.clear(); for (GroupedFacetHit groupedFacetHit : groupedFacetHits) { int facetOrd = facetFieldSource.getOrdByValue(groupedFacetHit.facetValue, facetSpare); if (facetOrd < 0) { continue; } int groupOrd = groupFieldSource.getOrdByValue(groupedFacetHit.groupValue, groupSpare); if (groupOrd < 0) { continue; } int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); } if (facetPrefix != null) { startFacetOrd = facetFieldSource.getOrdByValue(facetPrefix, facetSpare); if (startFacetOrd < 0) { // Points to the ord one higher than facetPrefix startFacetOrd = -startFacetOrd - 1; } BytesRef facetEndPrefix = BytesRef.deepCopyOf(facetPrefix); facetEndPrefix.append(UnicodeUtil.BIG_TERM); endFacetOrd = facetFieldSource.getOrdByValue(facetEndPrefix, facetSpare); endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix } else { startFacetOrd = 0; endFacetOrd = facetFieldSource.getValueCount(); } } protected SegmentResult createSegmentResult() throws IOException { if (startFacetOrd == 0 && facetFieldSource.getByOrd(startFacetOrd, facetSpare).length == 0) { int missing = segmentFacetCounts[0]; int total = segmentTotalCount - segmentFacetCounts[0]; return new SegmentResult(segmentFacetCounts, total, missing, facetFieldSource, endFacetOrd); } else { return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldSource, startFacetOrd, endFacetOrd); } } private DocValues.SortedSource getDocValuesSortedSource(String field, Type dvType, boolean diskResident, AtomicReader reader) throws IOException { DocValues dv = reader.docValues(field); DocValues.Source dvSource; if (dv != null) { dvSource = diskResident ? dv.getDirectSource() : dv.getSource(); } else { dvSource = DocValues.getDefaultSortedSource(dvType, reader.maxDoc()); } return dvSource.asSortedSource(); } private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult { final DocValues.SortedSource facetFieldSource; final BytesRef spare = new BytesRef(); SegmentResult(int[] counts, int total, int missing, DocValues.SortedSource facetFieldSource, int endFacetOrd) { super(counts, total, missing, endFacetOrd); this.facetFieldSource = facetFieldSource; this.mergePos = 1; if (mergePos < maxTermPos) { mergeTerm = facetFieldSource.getByOrd(mergePos, spare); } } SegmentResult(int[] counts, int total, DocValues.SortedSource facetFieldSource, int startFacetOrd, int endFacetOrd) { super(counts, total, 0, endFacetOrd); this.facetFieldSource = facetFieldSource; this.mergePos = startFacetOrd; if (mergePos < maxTermPos) { mergeTerm = facetFieldSource.getByOrd(mergePos, spare); } } @Override protected void nextTerm() throws IOException { mergeTerm = facetFieldSource.getByOrd(mergePos, spare); } } } } } class GroupedFacetHit { final BytesRef groupValue; final BytesRef facetValue; GroupedFacetHit(BytesRef groupValue, BytesRef facetValue) { this.groupValue = groupValue; this.facetValue = facetValue; } }