/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search; import java.io.IOException; import java.util.ArrayList; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.util.FixedBitSet; /** * */ public class DocSetCollector extends SimpleCollector { int pos=0; FixedBitSet bits; final int maxDoc; final int smallSetSize; int base; // in case there aren't that many hits, we may not want a very sparse // bit array. Optimistically collect the first few docs in an array // in case there are only a few. final ExpandingIntArray scratch; public DocSetCollector(int maxDoc) { this(DocSetUtil.smallSetSize(maxDoc), maxDoc); } public DocSetCollector(int smallSetSize, int maxDoc) { this.smallSetSize = smallSetSize; this.maxDoc = maxDoc; this.scratch = new ExpandingIntArray(smallSetSize); } @Override public void collect(int doc) throws IOException { doc += base; // optimistically collect the first docs in an array // in case the total number will be small enough to represent // as a small set like SortedIntDocSet instead... // Storing in this array will be quicker to convert // than scanning through a potentially huge bit vector. // FUTURE: when search methods all start returning docs in order, maybe // we could have a ListDocSet() and use the collected array directly. if (pos < smallSetSize) { scratch.add(pos, doc); } else { // this conditional could be removed if BitSet was preallocated, but that // would take up more memory, and add more GC time... if (bits==null) bits = new FixedBitSet(maxDoc); bits.set(doc); } pos++; } /** The number of documents that have been collected */ public int size() { return pos; } public DocSet getDocSet() { if (pos<=scratch.size()) { // assumes docs were collected in sorted order! return new SortedIntDocSet(scratch.toArray(), pos); // } else if (pos == maxDoc) { // return new MatchAllDocSet(maxDoc); // a bunch of code currently relies on BitDocSet (either explicitly, or implicitly for performance) } else { // set the bits for ids that were collected in the array scratch.copyTo(bits); return new BitDocSet(bits,pos); } } @Override public void setScorer(Scorer scorer) throws IOException { } @Override public boolean needsScores() { return false; } @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { this.base = context.docBase; } protected static class ExpandingIntArray { private static final int[] EMPTY = new int[0]; private int[] currentAddArray = null; private int indexForNextAddInCurrentAddArray = 0; private int size = 0; private final int smallSetSize; private ArrayList<int[]> arrays; public ExpandingIntArray(int smallSetSize) { this.smallSetSize = smallSetSize; this.currentAddArray = EMPTY; } private void addNewArray() { int arrSize = Math.max(10, currentAddArray.length << 1); arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize this.currentAddArray = new int[arrSize]; if (arrays == null) { arrays = new ArrayList<>(); } arrays.add(this.currentAddArray); indexForNextAddInCurrentAddArray = 0; // System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size)); } public void add(int index, int value) { // assert index == size; // only appending is supported if (indexForNextAddInCurrentAddArray >= currentAddArray.length) { addNewArray(); } currentAddArray[indexForNextAddInCurrentAddArray++] = value; size++; } public void copyTo(FixedBitSet bits) { if (size > 0) { int resultPos = 0; for (int i = 0; i < arrays.size(); i++) { int[] srcArray = arrays.get(i); int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray; for (int j = 0; j < intsToCopy; j++) { bits.set(srcArray[j]); } resultPos += intsToCopy; } assert resultPos == size; } } public int[] toArray() { int[] result = new int[size]; if (size > 0) { int resultPos = 0; for (int i = 0; i < arrays.size(); i++) { int[] srcArray = arrays.get(i); int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray; System.arraycopy(srcArray, 0, result, resultPos, intsToCopy); resultPos += intsToCopy; } assert resultPos == size; } return result; } public int size() { return size; } } }