package org.apache.lucene.search.grouping; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; /** * A collector that collects all groups that match the * query. Only the group value is collected, and the order * is undefined. This collector does not determine * the most relevant document of a group. * * <p/> * Implementation detail: an int hash set (SentinelIntSet) * is used to detect if a group is already added to the * total count. For each segment the int set is cleared and filled * with previous counted groups that occur in the new * segment. * * @lucene.experimental */ public class TermAllGroupsCollector extends AbstractAllGroupsCollector<String> { private static final int DEFAULT_INITIAL_SIZE = 128; private final String groupField; private final SentinelIntSet ordSet; private final List<String> groups; private FieldCache.StringIndex index; /** * Expert: Constructs a {@link AbstractAllGroupsCollector} * * @param groupField The field to group by * @param initialSize The initial allocation size of the * internal int set and group list * which should roughly match the total * number of expected unique groups. Be aware that the * heap usage is 4 bytes * initialSize. */ public TermAllGroupsCollector(String groupField, int initialSize) { ordSet = new SentinelIntSet(initialSize, -1); groups = new ArrayList<String>(initialSize); this.groupField = groupField; } /** * Constructs a {@link AbstractAllGroupsCollector}. This sets the * initial allocation size for the internal int set and group * list to 128. * * @param groupField The field to group by */ public TermAllGroupsCollector(String groupField) { this(groupField, DEFAULT_INITIAL_SIZE); } public void collect(int doc) throws IOException { int key = index.order[doc]; if (!ordSet.exists(key)) { ordSet.put(key); String term = key == 0 ? null : index.lookup[key]; groups.add(term); } } /** * {@inheritDoc} */ public Collection<String> getGroups() { return groups; } public void setNextReader(IndexReader reader, int docBase) throws IOException { index = FieldCache.DEFAULT.getStringIndex(reader, groupField); // Clear ordSet and fill it with previous encountered groups that can occur in the current segment. ordSet.clear(); for (String countedGroup : groups) { int ord = index.binarySearchLookup(countedGroup); if (ord >= 0) { ordSet.put(ord); } } } }