package org.apache.lucene.index.codecs.standard;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Consumes doc & freq, writing them using the current
* index file format */
import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
/** @lucene.experimental */
public final class StandardPostingsWriterImpl extends StandardPostingsWriter {
final static String CODEC = "StandardPostingsWriterImpl";
// Increment version to change it:
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
final IndexOutput freqOut;
final IndexOutput proxOut;
final DefaultSkipListWriter skipListWriter;
final int skipInterval;
final int maxSkipLevels;
final int totalNumDocs;
IndexOutput termsOut;
boolean omitTermFreqAndPositions;
boolean storePayloads;
// Starts a new term
long lastFreqStart;
long freqStart;
long lastProxStart;
long proxStart;
FieldInfo fieldInfo;
int lastPayloadLength;
int lastPosition;
public StandardPostingsWriterImpl(SegmentWriteState state) throws IOException {
super();
String fileName = IndexFileNames.segmentFileName(state.segmentName, "", StandardCodec.FREQ_EXTENSION);
state.flushedFiles.add(fileName);
freqOut = state.directory.createOutput(fileName);
if (state.fieldInfos.hasProx()) {
// At least one field does not omit TF, so create the
// prox file
fileName = IndexFileNames.segmentFileName(state.segmentName, "", StandardCodec.PROX_EXTENSION);
state.flushedFiles.add(fileName);
proxOut = state.directory.createOutput(fileName);
} else {
// Every field omits TF so we will write no prox file
proxOut = null;
}
totalNumDocs = state.numDocs;
skipListWriter = new DefaultSkipListWriter(state.skipInterval,
state.maxSkipLevels,
state.numDocs,
freqOut,
proxOut);
skipInterval = state.skipInterval;
maxSkipLevels = state.maxSkipLevels;
}
@Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
}
@Override
public void startTerm() {
freqStart = freqOut.getFilePointer();
if (proxOut != null) {
proxStart = proxOut.getFilePointer();
// force first payload to write its length
lastPayloadLength = -1;
}
skipListWriter.resetSkip();
}
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
public void setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
storePayloads = fieldInfo.storePayloads;
}
int lastDocID;
int df;
/** Adds a new doc in this term. If this returns null
* then we just skip consuming positions/payloads. */
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
final int delta = docID - lastDocID;
if (docID < 0 || (df > 0 && delta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
}
if ((++df % skipInterval) == 0) {
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
skipListWriter.bufferSkip(df);
}
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
lastDocID = docID;
if (omitTermFreqAndPositions) {
freqOut.writeVInt(delta);
} else if (1 == termDocFreq) {
freqOut.writeVInt((delta<<1) | 1);
} else {
freqOut.writeVInt(delta<<1);
freqOut.writeVInt(termDocFreq);
}
lastPosition = 0;
}
/** Add a new position & payload */
@Override
public void addPosition(int position, BytesRef payload) throws IOException {
assert !omitTermFreqAndPositions: "omitTermFreqAndPositions is true";
assert proxOut != null;
final int delta = position - lastPosition;
assert delta > 0 || position == 0 || position == -1: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
lastPosition = position;
if (storePayloads) {
final int payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
lastPayloadLength = payloadLength;
proxOut.writeVInt((delta<<1)|1);
proxOut.writeVInt(payloadLength);
} else {
proxOut.writeVInt(delta << 1);
}
if (payloadLength > 0) {
proxOut.writeBytes(payload.bytes, payload.offset, payloadLength);
}
} else {
proxOut.writeVInt(delta);
}
}
@Override
public void finishDoc() {
}
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
assert docCount > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert docCount == df;
if (isIndexTerm) {
// Write absolute at seek points
termsOut.writeVLong(freqStart);
} else {
// Write delta between seek points
termsOut.writeVLong(freqStart - lastFreqStart);
}
lastFreqStart = freqStart;
if (df >= skipInterval) {
termsOut.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}
if (!omitTermFreqAndPositions) {
if (isIndexTerm) {
// Write absolute at seek points
termsOut.writeVLong(proxStart);
} else {
// Write delta between seek points
termsOut.writeVLong(proxStart - lastProxStart);
}
lastProxStart = proxStart;
}
lastDocID = 0;
df = 0;
}
@Override
public void close() throws IOException {
try {
freqOut.close();
} finally {
if (proxOut != null) {
proxOut.close();
}
}
}
}