package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
/**
* Stores and iterate on sorted integers in compressed form in RAM. <br>
* The code for compressing the differences between ascending integers was
* borrowed from {@link org.apache.lucene.store.IndexInput} and
* {@link org.apache.lucene.store.IndexOutput}.
* <p>
* <b>NOTE:</b> this class assumes the stored integers are doc Ids (hence why it
* extends {@link DocIdSet}). Therefore its {@link #iterator()} assumes {@link
* DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you intent to use
* this value, then make sure it's not used during search flow.
*/
public class SortedVIntList extends DocIdSet {
/** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
* a SortedVIntList representing the index numbers of the set bits
* will be smaller than that BitSet.
*/
final static int BITS2VINTLIST_SIZE = 8;
private int size;
private byte[] bytes;
private int lastBytePos;
/**
* Create a SortedVIntList from all elements of an array of integers.
*
* @param sortedInts A sorted array of non negative integers.
*/
public SortedVIntList(int... sortedInts) {
this(sortedInts, sortedInts.length);
}
/**
* Create a SortedVIntList from an array of integers.
* @param sortedInts An array of sorted non negative integers.
* @param inputSize The number of integers to be used from the array.
*/
public SortedVIntList(int[] sortedInts, int inputSize) {
SortedVIntListBuilder builder = new SortedVIntListBuilder();
for (int i = 0; i < inputSize; i++) {
builder.addInt(sortedInts[i]);
}
builder.done();
}
/**
* Create a SortedVIntList from a BitSet.
* @param bits A bit set representing a set of integers.
*/
public SortedVIntList(BitSet bits) {
SortedVIntListBuilder builder = new SortedVIntListBuilder();
int nextInt = bits.nextSetBit(0);
while (nextInt != -1) {
builder.addInt(nextInt);
nextInt = bits.nextSetBit(nextInt + 1);
}
builder.done();
}
/**
* Create a SortedVIntList from an OpenBitSet.
* @param bits A bit set representing a set of integers.
*/
public SortedVIntList(OpenBitSet bits) {
SortedVIntListBuilder builder = new SortedVIntListBuilder();
int nextInt = bits.nextSetBit(0);
while (nextInt != -1) {
builder.addInt(nextInt);
nextInt = bits.nextSetBit(nextInt + 1);
}
builder.done();
}
/**
* Create a SortedVIntList.
* @param docIdSetIterator An iterator providing document numbers as a set of integers.
* This DocIdSetIterator is iterated completely when this constructor
* is called and it must provide the integers in non
* decreasing order.
*/
public SortedVIntList(DocIdSetIterator docIdSetIterator) throws IOException {
SortedVIntListBuilder builder = new SortedVIntListBuilder();
int doc;
while ((doc = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
builder.addInt(doc);
}
builder.done();
}
private class SortedVIntListBuilder {
private int lastInt = 0;
SortedVIntListBuilder() {
initBytes();
lastInt = 0;
}
void addInt(int nextInt) {
int diff = nextInt - lastInt;
if (diff < 0) {
throw new IllegalArgumentException(
"Input not sorted or first element negative.");
}
if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
// Biggest possible int does not fit.
resizeBytes(ArrayUtil.oversize(lastBytePos + MAX_BYTES_PER_INT, 1));
}
// See org.apache.lucene.store.IndexOutput.writeVInt()
while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set.
bytes[lastBytePos++] = (byte) ((diff & VB1) | ~VB1);
diff >>>= BIT_SHIFT;
}
bytes[lastBytePos++] = (byte) diff; // Last byte, high bit not set.
size++;
lastInt = nextInt;
}
void done() {
resizeBytes(lastBytePos);
}
}
private void initBytes() {
size = 0;
bytes = new byte[128]; // initial byte size
lastBytePos = 0;
}
private void resizeBytes(int newSize) {
if (newSize != bytes.length) {
byte[] newBytes = new byte[newSize];
System.arraycopy(bytes, 0, newBytes, 0, lastBytePos);
bytes = newBytes;
}
}
private static final int VB1 = 0x7F;
private static final int BIT_SHIFT = 7;
private final int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
/**
* @return The total number of sorted integers.
*/
public int size() {
return size;
}
/**
* @return The size of the byte array storing the compressed sorted integers.
*/
public int getByteSize() {
return bytes.length;
}
/** This DocIdSet implementation is cacheable. */
@Override
public boolean isCacheable() {
return true;
}
/**
* @return An iterator over the sorted integers.
*/
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int bytePos = 0;
int lastInt = 0;
int doc = -1;
private void advance() {
// See org.apache.lucene.store.IndexInput.readVInt()
byte b = bytes[bytePos++];
lastInt += b & VB1;
for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT) {
b = bytes[bytePos++];
lastInt += (b & VB1) << s;
}
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
if (bytePos >= lastBytePos) {
doc = NO_MORE_DOCS;
} else {
advance();
doc = lastInt;
}
return doc;
}
@Override
public int advance(int target) {
while (bytePos < lastBytePos) {
advance();
if (lastInt >= target) {
return doc = lastInt;
}
}
return doc = NO_MORE_DOCS;
}
};
}
}