package org.apache.solr.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.BitsFilteredDocIdSet; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.apache.solr.core.HS; public class BitDocSetNative extends DocSetBaseNative implements Bits, Cloneable { final long array; protected final int wlen; // number of words in the array protected int size = -1; // number of docs in the set (cached for perf) @Override protected void free() { HS.freeArray(array); } public BitDocSetNative(int numBits) { this.wlen = FixedBitSet.bits2words(numBits); this.array = HS.allocArray(wlen, 8, true); } public BitDocSetNative(BitDocSetNative other) { this.wlen = other.wlen; this.array = HS.allocArray(wlen, 8, false); // don't zero memory since we will copy over it HS.copyLongs(other.array, 0, array, 0, wlen); // Don't set size... the purpose of making a copy will be to change it. } public BitDocSetNative(FixedBitSet other) { this.wlen = FixedBitSet.bits2words(other.length()); // hmmm, we want numWords back! this.array = HS.allocArray(wlen, 8, false); // don't zero memory since we will copy over it HS.copyLongs(other.getBits(), 0, this.array, 0, wlen); } public FixedBitSet toFixedBitSet() { long[] longArray = new long[wlen]; HS.copyLongs(array, 0, longArray, 0, wlen); return new FixedBitSet(longArray, length()); // Don't set size... the purpose of making a copy will be to change it. } public int capacity() { return wlen<<6; } @Override public int length() { return wlen<<6; } public DocIterator iterator() { return new DocIterator() { int pos=nextSetBit(0); public boolean hasNext() { return pos>=0; } public Integer next() { return nextDoc(); } public void remove() { fastClear(pos); } public int nextDoc() { int old=pos; pos=nextSetBit(old + 1); return old; } public float score() { return 0.0f; } }; } /*** @Override public DocIterator iterator() { return new DocIterator() { private final FixedBitSetIterator iter = new FixedBitSetIterator(bits); private int pos = iter.nextDoc(); @Override public boolean hasNext() { return pos != DocIdSetIterator.NO_MORE_DOCS; } @Override public Integer next() { return nextDoc(); } @Override public void remove() { throw new UnsupportedOperationException(); // bits.clear(pos); } @Override public int nextDoc() { int old=pos; pos=iter.nextDoc(); return old; } @Override public float score() { return 0.0f; } }; } ***/ @Override public FixedBitSet getBits() { // HS-TODO: if used in production, we should optimize FixedBitSet obs = new FixedBitSet(capacity()); long[] target = obs.getBits(); for (int i=0; i<wlen; i++) { target[i] = HS.getLong(array, i); } return obs; } @Override public void add(int doc) { fastSet(doc); size=-1; // invalidate size } @Override public void addUnique(int doc) { fastSet(doc); size=-1; // invalidate size } public int numWords() { return wlen; } @Override public int size() { if (size!=-1) return size; int words = numWords(); int nBits = 0; for (int i=0; i<words; i++) { nBits += Long.bitCount( HS.getLong(array, i) ); } size = nBits; return size; } /** * The number of set bits - size - is cached. If the bitset is changed externally, * this method should be used to invalidate the previously cached size. */ public void invalidateSize() { size=-1; } /** expert: * Sets the number of bits set. This is not validated! */ public void setSize(int size) { this.size = size; } /** Returns true or false for the specified bit index. * The index should be less than the FixedBitSet size */ public boolean get(int index) { int i = index >> 6; // div 64 // signed shift will keep a negative index and force an // array-index-out-of-bounds-exception, removing the need for an explicit check. int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; return (HS.getLong(array, i) & bitmask) != 0; } /** Returns true or false for the specified bit index. * The index should be less than the FixedBitSet size */ public boolean fastGet(int index) { int i = index >> 6; // div 64 // signed shift will keep a negative index and force an // array-index-out-of-bounds-exception, removing the need for an explicit check. int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; return (HS.getLong(array, i) & bitmask) != 0; } /** returns 1 if the bit is set, 0 if not. * The index should be less than the FixedBitSet size */ public int getBit(int index) { int i = index >> 6; // div 64 int bit = index & 0x3f; // mod 64 return ((int)(HS.getLong(array,i)>>>bit)) & 0x01; } public void fastSet(int index) { int wordNum = index >> 6; // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; HS.setLong(array,wordNum, HS.getLong(array,wordNum) | bitmask); } public void fastClear(int index) { int wordNum = index >> 6; int bit = index & 0x03f; long bitmask = 1L << bit; HS.setLong(array, wordNum, HS.getLong(array, wordNum) & ~bitmask); // hmmm, it takes one more instruction to clear than it does to set... any // way to work around this? If there were only 63 bits per word, we could // use a right shift of 10111111...111 in binary to position the 0 in the // correct place (using sign extension). // Could also use Long.rotateRight() or rotateLeft() *if* they were converted // by the JVM into a native instruction. // bits[word] &= Long.rotateLeft(0xfffffffe,bit); } public boolean getAndSet(int index) { int wordNum = index >> 6; // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; long word = HS.getLong(array,wordNum) ; boolean val = (word & bitmask) != 0; HS.setLong(array,wordNum, word | bitmask); return val; } public int getAndSetBit(int index) { int wordNum = index >> 6; // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; long word = HS.getLong(array,wordNum) ; HS.setLong(array,wordNum, word | bitmask); return ((int)(word >>> bit)) & 0x01; } public void fastFlip(int index) { int wordNum = index >> 6; // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; HS.setLong(array,wordNum, HS.getLong(array,wordNum) ^ bitmask); } public boolean flipAndGet(int index) { int wordNum = index >> 6; // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; long word = HS.getLong(array,wordNum) ^ bitmask; HS.setLong(array, wordNum, word); return (word & bitmask) != 0; } public void flip(int startIndex, int endIndex) { if (endIndex <= startIndex) return; int startWord = (startIndex>>6); // since endIndex is one past the end, this is index of the last // word to be changed. int endWord = (endIndex-1) >> 6; /*** Grrr, java shifting wraps around so -1L>>>64 == -1 * for that reason, make sure not to use endmask if the bits to flip will * be zero in the last word (redefine endWord to be the last changed...) long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000 long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111 ***/ long startmask = -1L << startIndex; long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap if (startWord == endWord) { HS.setLong(array, startWord, HS.getLong(array, startWord) ^ (startmask & endmask) ); return; } HS.setLong(array, startWord, HS.getLong(array, startWord) ^ startmask); for (int i=startWord+1; i<endWord; i++) { HS.setLong(array, i, ~HS.getLong(array, i)); } HS.setLong(array, endWord, HS.getLong(array, endWord) ^ endmask); } /** @return the number of set bits */ public long cardinality() { return size(); } public static boolean intersects(BitDocSetNative a, BitDocSetNative b) { assert(a.wlen == b.wlen); int nWords = a.wlen; for (int i=0; i<nWords; i++) { if ( (HS.getLong(a.array, i) & HS.getLong(b.array, i)) != 0 ) { return true; } } return false; } /** Returns the popcount or cardinality of the intersection of the two sets. * Neither set is modified. */ public static int intersectionCount(BitDocSetNative a, BitDocSetNative b) { assert(a.wlen == b.wlen); int nWords = a.numWords(); int result = 0; for (int i=0; i<nWords; i++) { long w1 = HS.getLong(a.array, i); long w2 = HS.getLong(b.array, i); result += Long.bitCount(w1 & w2); } return result; } public static int intersectionCount(BitDocSetNative a, FixedBitSet b) { assert(a.wlen == b.getBits().length); int nWords = a.numWords(); long[] bArray = b.getBits(); int result = 0; for (int i=0; i<nWords; i++) { long w1 = HS.getLong(a.array, i); long w2 = bArray[i]; result += Long.bitCount(w1 & w2); } return result; } public static int unionCount(BitDocSetNative a, BitDocSetNative b) { assert(a.wlen == b.wlen); int nWords = a.numWords(); int result = 0; for (int i=0; i<nWords; i++) { long w1 = HS.getLong(a.array, i); long w2 = HS.getLong(b.array, i); result += Long.bitCount(w1 | w2); } return result; } public static int andNotCount(BitDocSetNative a, BitDocSetNative b) { assert(a.wlen == b.wlen); int nWords = a.numWords(); int result = 0; for (int i=0; i<nWords; i++) { long w1 = HS.getLong(a.array, i); long w2 = HS.getLong(b.array, i); result += Long.bitCount(w1 & ~w2); } return result; } public static int xorCount(BitDocSetNative a, BitDocSetNative b) { assert(a.wlen == b.wlen); int nWords = a.numWords(); int result = 0; for (int i=0; i<nWords; i++) { long w1 = HS.getLong(a.array, i); long w2 = HS.getLong(b.array, i); result += Long.bitCount(w1 ^ w2); } return result; } /** Returns the index of the first set bit starting at the index specified. * -1 is returned if there are no more set bits. */ public int nextSetBit(int index) { int i = index>>6; if (i>=wlen) return -1; int subIndex = index & 0x3f; // index within the word long word = HS.getLong(array, i) >> subIndex; // skip all the bits to the right of index if (word!=0) { return (i<<6) + subIndex + Long.numberOfTrailingZeros(word); } while(++i < wlen) { word = HS.getLong(array, i); if (word!=0) return (i<<6) + Long.numberOfTrailingZeros(word); } return -1; } /** Returns the index of the first set bit starting downwards at * the index specified. * -1 is returned if there are no more set bits. */ public int prevSetBit(int index) { int i = index >> 6; final int subIndex; long word; if (i >= wlen) { i = wlen - 1; if (i < 0) return -1; subIndex = 63; // last possible bit word = HS.getLong(array,i); } else { if (i < 0) return -1; subIndex = index & 0x3f; // index within the word word = (HS.getLong(array,i) << (63-subIndex)); // skip all the bits to the left of index } if (word != 0) { return (i << 6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197 } while (--i >= 0) { word = HS.getLong(array, i); if (word !=0 ) { return (i << 6) + 63 - Long.numberOfLeadingZeros(word); } } return -1; } /** this = this AND other */ public void intersectMe(BitDocSetNative other) { assert this.wlen == other.wlen; long thisArr = this.array; long otherArr = other.array; // testing against zero can be more efficient int pos=wlen; while(--pos>=0) { HS.setLong(thisArr, pos, HS.getLong(thisArr,pos) & HS.getLong(otherArr, pos)); } } /** this = this OR other */ public void unionMe(BitDocSetNative other) { assert this.wlen == other.wlen; long thisArr = this.array; long otherArr = other.array; // testing against zero can be more efficient int pos=wlen; while(--pos>=0) { HS.setLong(thisArr, pos, HS.getLong(thisArr,pos) | HS.getLong(otherArr, pos)); } } /** Remove all elements set in other. this = this AND_NOT other */ public void remove(BitDocSetNative other) { assert this.wlen == other.wlen; long thisArr = this.array; long otherArr = other.array; // testing against zero can be more efficient int pos=wlen; while(--pos>=0) { HS.setLong(thisArr, pos, HS.getLong(thisArr,pos) & ~HS.getLong(otherArr, pos)); } } /** Remove all elements set in other. this = this AND_NOT other */ public void xorMe(BitDocSetNative other) { assert this.wlen == other.wlen; long thisArr = this.array; long otherArr = other.array; // testing against zero can be more efficient int pos=wlen; while(--pos>=0) { HS.setLong(thisArr, pos, HS.getLong(thisArr,pos) ^ HS.getLong(otherArr, pos)); } } /////////////////////////////////////////////////////////////////////// ////////////////////////////// DocSet methods ///////////////////////// /////////////////////////////////////////////////////////////////////// /** Returns true of the doc exists in the set. * Should only be called when doc < FixedBitSet.size() */ @Override public boolean exists(int doc) { return fastGet(doc); } @Override public int intersectionSize(DocSet other) { if (other instanceof BitDocSetNative) { return intersectionCount(this, ((BitDocSetNative)other)); } else { // they had better not call us back! return other.intersectionSize(this); } } @Override public boolean intersects(DocSet other) { if (other instanceof BitDocSetNative) { return intersects(this, ((BitDocSetNative) other)); } else { // they had better not call us back! return other.intersects(this); } } @Override public int unionSize(DocSet other) { if (other instanceof BitDocSetNative) { // if we don't know our current size, this is faster than // size + other.size - intersection_size return unionCount(this, ((BitDocSetNative)other)); } else { // they had better not call us back! return other.unionSize(this); } } @Override public int andNotSize(DocSet other) { if (other instanceof BitDocSetNative) { // if we don't know our current size, this is faster than // size - intersection_size return andNotCount(this, ((BitDocSetNative)other)); } else { return super.andNotSize(other); } } @Override public void setBitsOn(FixedBitSet target) { assert this.wlen == target.getBits().length; long thisArr = this.array; long[] otherArr = target.getBits(); // testing against zero can be more efficient int pos=wlen; while(--pos>=0) { otherArr[pos] |= HS.getLong(thisArr, pos); } } @Override public void setBitsOn(BitDocSetNative target) { target.unionMe(this); } @Override public void addAllTo(DocSet target) { } @Override public DocSet andNot(DocSet other) { BitDocSetNative newbits = clone(); if (other instanceof BitDocSetNative) { newbits.remove(((BitDocSetNative) other)); } else { DocIterator iter = other.iterator(); while (iter.hasNext()) newbits.fastClear(iter.nextDoc()); } return newbits; } @Override public DocSet union(DocSet other) { BitDocSetNative newbits = clone(); if (other instanceof BitDocSetNative) { newbits.unionMe(((BitDocSetNative) other)); } else { DocIterator iter = other.iterator(); while (iter.hasNext()) newbits.fastSet(iter.nextDoc()); } return newbits; } @Override public long memSize() { return (((long)wlen) << 3) + 16; } @Override public BitDocSetNative clone() { return new BitDocSetNative(this); } /*** HS-TODO.. change DocSet to extend / encompass DocIdSet? // hopefully temporary public DocIdSet getDocIdSet() { return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { return getDocIdSetIterator(); } } } // hopefully temporary public DocIdSetIterator getDocIdSetIterator() { } ***/ @Override public Filter getTopFilter() { final BitDocSetNative bs = this; // TODO: if cardinality isn't cached, do a quick measure of sparseness // and return null from bits() if too sparse. return new Filter() { @Override public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) { AtomicReader reader = context.reader(); // all Solr DocSets that are used as filters only include live docs final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs); /*** HS-TODO if (context.isTopLevel) { return BitsFilteredDocIdSet.wrap(bs, acceptDocs); } ***/ final int base = context.docBase; final int maxDoc = reader.maxDoc(); final int max = base + maxDoc; // one past the max doc in this segment. return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { int pos=base-1; int adjustedDoc=-1; @Override public int docID() { return adjustedDoc; } @Override public int nextDoc() { pos = bs.nextSetBit(pos+1); return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS; } @Override public int advance(int target) { if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS; pos = bs.nextSetBit(target+base); return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS; } @Override public long cost() { // we don't want to actually compute cardinality, but // if its already been computed, we use it (pro-rated for the segment) if (size != -1) { return (long)(size * ((FixedBitSet.bits2words(maxDoc)<<6) / (float)bs.capacity())); } else { return maxDoc; } } }; } @Override public boolean isCacheable() { return true; } @Override public Bits bits() { return new Bits() { @Override public boolean get(int index) { return bs.fastGet(index + base); } @Override public int length() { return maxDoc; } }; } }, acceptDocs2); } }; } }