package org.apache.lucene.index.codecs.standard;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
/** Concrete class that reads the current doc/freq/skip
* postings format.
* @lucene.experimental */
public class StandardPostingsReaderImpl extends StandardPostingsReader {
private final IndexInput freqIn;
private final IndexInput proxIn;
int skipInterval;
int maxSkipLevels;
public StandardPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize) throws IOException {
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, "", StandardCodec.FREQ_EXTENSION),
readBufferSize);
if (segmentInfo.getHasProx()) {
boolean success = false;
try {
proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, "", StandardCodec.PROX_EXTENSION),
readBufferSize);
success = true;
} finally {
if (!success) {
freqIn.close();
}
}
} else {
proxIn = null;
}
}
public static void files(Directory dir, SegmentInfo segmentInfo, Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, "", StandardCodec.FREQ_EXTENSION));
if (segmentInfo.getHasProx()) {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, "", StandardCodec.PROX_EXTENSION));
}
}
@Override
public void init(IndexInput termsIn) throws IOException {
// Make sure we are talking to the matching past writer
CodecUtil.checkHeader(termsIn, StandardPostingsWriterImpl.CODEC,
StandardPostingsWriterImpl.VERSION_START, StandardPostingsWriterImpl.VERSION_START);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
}
private static class DocTermState extends TermState {
long freqOffset;
long proxOffset;
int skipOffset;
public Object clone() {
DocTermState other = (DocTermState) super.clone();
other.freqOffset = freqOffset;
other.proxOffset = proxOffset;
other.skipOffset = skipOffset;
return other;
}
public void copy(TermState _other) {
super.copy(_other);
DocTermState other = (DocTermState) _other;
freqOffset = other.freqOffset;
proxOffset = other.proxOffset;
skipOffset = other.skipOffset;
}
public String toString() {
return super.toString() + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " skipOffset=" + skipOffset;
}
}
@Override
public TermState newTermState() {
return new DocTermState();
}
@Override
public void close() throws IOException {
try {
if (freqIn != null) {
freqIn.close();
}
} finally {
if (proxIn != null) {
proxIn.close();
}
}
}
@Override
public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm)
throws IOException {
final DocTermState docTermState = (DocTermState) termState;
if (isIndexTerm) {
docTermState.freqOffset = termsIn.readVLong();
} else {
docTermState.freqOffset += termsIn.readVLong();
}
if (docTermState.docFreq >= skipInterval) {
docTermState.skipOffset = termsIn.readVInt();
} else {
docTermState.skipOffset = 0;
}
if (!fieldInfo.omitTermFreqAndPositions) {
if (isIndexTerm) {
docTermState.proxOffset = termsIn.readVLong();
} else {
docTermState.proxOffset += termsIn.readVLong();
}
}
}
@Override
public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
SegmentDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
docsEnum = new SegmentDocsEnum(freqIn);
} else {
docsEnum = (SegmentDocsEnum) reuse;
if (docsEnum.startFreqIn != freqIn) {
// If you are using ParellelReader, and pass in a
// reused DocsEnum, it could have come from another
// reader also using standard codec
docsEnum = new SegmentDocsEnum(freqIn);
}
}
return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
}
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (fieldInfo.omitTermFreqAndPositions) {
return null;
}
SegmentDocsAndPositionsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) {
docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
} else {
docsEnum = (SegmentDocsAndPositionsEnum) reuse;
if (docsEnum.startFreqIn != freqIn) {
// If you are using ParellelReader, and pass in a
// reused DocsEnum, it could have come from another
// reader also using standard codec
docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
}
}
return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
}
// Decodes only docs
private class SegmentDocsEnum extends DocsEnum {
final IndexInput freqIn;
final IndexInput startFreqIn;
boolean omitTF; // does current field omit term freq?
boolean storePayloads; // does current field store payloads?
int limit; // number of docs in this posting
int ord; // how many docs we've read
int doc; // doc we last read
int freq; // freq we last read
Bits skipDocs;
long freqOffset;
int skipOffset;
boolean skipped;
DefaultSkipListReader skipper;
public SegmentDocsEnum(IndexInput freqIn) throws IOException {
startFreqIn = freqIn;
this.freqIn = (IndexInput) freqIn.clone();
}
public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
omitTF = fieldInfo.omitTermFreqAndPositions;
if (omitTF) {
freq = 1;
}
storePayloads = fieldInfo.storePayloads;
this.skipDocs = skipDocs;
freqOffset = termState.freqOffset;
skipOffset = termState.skipOffset;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
limit = termState.docFreq;
ord = 0;
doc = 0;
skipped = false;
return this;
}
@Override
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
return doc = NO_MORE_DOCS;
}
ord++;
// Decode next doc/freq pair
final int code = freqIn.readVInt();
if (omitTF) {
doc += code;
} else {
doc += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = freqIn.readVInt(); // else read freq
}
}
if (skipDocs == null || !skipDocs.get(doc)) {
break;
}
}
return doc;
}
@Override
public int read() throws IOException {
final int[] docs = bulkResult.docs.ints;
final int[] freqs = bulkResult.freqs.ints;
int i = 0;
final int length = docs.length;
while (i < length && ord < limit) {
ord++;
// manually inlined call to next() for speed
final int code = freqIn.readVInt();
if (omitTF) {
doc += code;
} else {
doc += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = freqIn.readVInt(); // else read freq
}
}
if (skipDocs == null || !skipDocs.get(doc)) {
docs[i] = doc;
freqs[i] = freq;
++i;
}
}
return i;
}
@Override
public int docID() {
return doc;
}
@Override
public int freq() {
return freq;
}
@Override
public int advance(int target) throws IOException {
// TODO: jump right to next() if target is < X away
// from where we are now?
if (skipOffset > 0) {
// There are enough docs in the posting to have
// skip data
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval);
}
if (!skipped) {
// This is the first time this posting has
// skipped since reset() was called, so now we
// load the skip data for this posting
skipper.init(freqOffset + skipOffset,
freqOffset, 0,
limit, storePayloads);
skipped = true;
}
final int newOrd = skipper.skipTo(target);
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
doc = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
}
}
// scan for the rest:
do {
nextDoc();
} while (target > doc);
return doc;
}
}
// Decodes docs & positions
private class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum {
final IndexInput startFreqIn;
private final IndexInput freqIn;
private final IndexInput proxIn;
boolean storePayloads; // does current field store payloads?
int limit; // number of docs in this posting
int ord; // how many docs we've read
int doc; // doc we last read
int freq; // freq we last read
int position;
Bits skipDocs;
long freqOffset;
int skipOffset;
long proxOffset;
int posPendingCount;
int payloadLength;
boolean payloadPending;
boolean skipped;
DefaultSkipListReader skipper;
private BytesRef payload;
private long lazyProxPointer;
public SegmentDocsAndPositionsEnum(IndexInput freqIn, IndexInput proxIn) throws IOException {
startFreqIn = freqIn;
this.freqIn = (IndexInput) freqIn.clone();
this.proxIn = (IndexInput) proxIn.clone();
}
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
assert !fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
if (storePayloads && payload == null) {
payload = new BytesRef();
payload.bytes = new byte[1];
}
this.skipDocs = skipDocs;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
lazyProxPointer = termState.proxOffset;
limit = termState.docFreq;
ord = 0;
doc = 0;
position = 0;
skipped = false;
posPendingCount = 0;
payloadPending = false;
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
return this;
}
@Override
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
return doc = NO_MORE_DOCS;
}
ord++;
// Decode next doc/freq pair
final int code = freqIn.readVInt();
doc += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = freqIn.readVInt(); // else read freq
}
posPendingCount += freq;
if (skipDocs == null || !skipDocs.get(doc)) {
break;
}
}
position = 0;
return doc;
}
@Override
public int docID() {
return doc;
}
@Override
public int freq() {
return freq;
}
@Override
public int advance(int target) throws IOException {
// TODO: jump right to next() if target is < X away
// from where we are now?
if (skipOffset > 0) {
// There are enough docs in the posting to have
// skip data
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval);
}
if (!skipped) {
// This is the first time this posting has
// skipped, since reset() was called, so now we
// load the skip data for this posting
skipper.init(freqOffset+skipOffset,
freqOffset, proxOffset,
limit, storePayloads);
skipped = true;
}
final int newOrd = skipper.skipTo(target);
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
doc = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
lazyProxPointer = skipper.getProxPointer();
posPendingCount = 0;
position = 0;
payloadPending = false;
payloadLength = skipper.getPayloadLength();
}
}
// Now, linear scan for the rest:
do {
nextDoc();
} while (target > doc);
return doc;
}
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
proxIn.seek(lazyProxPointer);
lazyProxPointer = -1;
}
if (payloadPending && payloadLength > 0) {
// payload of last position as never retrieved -- skip it
proxIn.seek(proxIn.getFilePointer() + payloadLength);
payloadPending = false;
}
// scan over any docs that were iterated without their positions
while(posPendingCount > freq) {
final int code = proxIn.readVInt();
if (storePayloads) {
if ((code & 1) != 0) {
// new payload length
payloadLength = proxIn.readVInt();
assert payloadLength >= 0;
}
assert payloadLength != -1;
proxIn.seek(proxIn.getFilePointer() + payloadLength);
}
posPendingCount--;
position = 0;
payloadPending = false;
}
// read next position
if (storePayloads) {
if (payloadPending && payloadLength > 0) {
// payload wasn't retrieved for last position
proxIn.seek(proxIn.getFilePointer()+payloadLength);
}
final int code = proxIn.readVInt();
if ((code & 1) != 0) {
// new payload length
payloadLength = proxIn.readVInt();
assert payloadLength >= 0;
}
assert payloadLength != -1;
payloadPending = true;
position += code >>> 1;
} else {
position += proxIn.readVInt();
}
posPendingCount--;
assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;
return position;
}
/** Returns length of payload at current position */
public int getPayloadLength() {
assert lazyProxPointer == -1;
assert posPendingCount < freq;
return payloadLength;
}
/** Returns the payload at this position, or null if no
* payload was indexed. */
public BytesRef getPayload() throws IOException {
assert lazyProxPointer == -1;
assert posPendingCount < freq;
if (!payloadPending) {
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
}
if (payloadLength > payload.bytes.length) {
payload.grow(payloadLength);
}
proxIn.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength;
payloadPending = false;
return payload;
}
public boolean hasPayload() {
return payloadPending && payloadLength > 0;
}
}
}