package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Consumes doc & freq, writing them using the current
* index file format */
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.solr.request.uninverted.UnInvertedField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
final class FormatPostingsDocsWriter extends FormatPostingsDocsConsumer implements Closeable {
final IndexOutput out;
final FormatPostingsTermsWriter parent;
final FormatPostingsPositionsWriter posWriter;
final DefaultSkipListWriter skipListWriter;
final int skipInterval;
final int totalNumDocs;
boolean omitTermFreqAndPositions;
boolean storePayloads;
long freqStart;
FieldInfo fieldInfo;
private int compressType=0;
FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent) throws IOException {
this.parent = parent;
skipInterval = parent.parent.termsOut.skipInterval;
out = parent.parent.dir.createOutput(IndexFileNames.segmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION));
if(skipInterval>=(Integer.MAX_VALUE-10000))
{
this.compressType=1;
}else{
this.compressType=0;
}
out.writeVInt(this.compressType);//1:noskipcompress 0:nocompress
boolean success = false;
try {
totalNumDocs = parent.parent.totalNumDocs;
skipListWriter = parent.parent.skipListWriter;
skipListWriter.setFreqOutput(out);
posWriter = new FormatPostingsPositionsWriter(state, this);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
}
}
}
void setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
omitTermFreqAndPositions = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.storePayloads;
posWriter.setField(fieldInfo);
}
int lastDocID;
int df;
public static Logger LOG = LoggerFactory.getLogger(UnInvertedField.class);
@Override
FormatPostingsPositionsConsumer addDoc(int docID, int termDocFreq) throws IOException {
parent.termsOut.collect(docID);
final int delta = docID - lastDocID;
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
if (docID < 0 || (df > 0 && delta <= 0))
{
CorruptIndexException ex=new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (out: " + out + ")");
throw ex;
}
if ((++df % skipInterval) == 0) {
this.out.flushCompressBlock();
this.reset();
skipListWriter.setSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength);
skipListWriter.bufferSkip(df);
}
lastDocID = docID;
if (omitTermFreqAndPositions)
this.out.writeCompressblock(delta,1);
else if (1 == termDocFreq)
this.out.writeCompressblock((delta<<1) | 1,1);
else {
this.out.writeCompressblock(delta<<1,1);
this.out.writeCompressblock(termDocFreq,0);
}
return posWriter;
}
private final TermInfo termInfo = new TermInfo(); // minimize consing
final UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
/** Called when we are done adding docs to this term */
@Override
void finish() throws IOException {
this.out.flushCompressBlock();
long skipPointer = skipListWriter.writeSkip(out);
// if(savefreqpos!=parent.freqStart)
// {
// LOG.info("freq start is wrong ,change it" +savefreqpos+","+parent.freqStart);
// }
termInfo.set(df, parent.freqStart, parent.proxStart, (int) (skipPointer - parent.freqStart));
// TODO: we could do this incrementally
UnicodeUtil.UTF16toUTF8(parent.currentTerm, parent.currentTermStart, utf8);
if (df > 0) {
parent.termsOut.add(parent.currentTermobj,fieldInfo.number,
utf8.result,
utf8.length,
termInfo);
}
parent.termsOut.addTm(parent.currentTermobj,fieldInfo.number);;
lastDocID = 0;
df = 0;
}
public void startTerm() throws IOException
{
parent.termsOut.startTerm(parent.currentTermobj,fieldInfo.number);
}
public void close() throws IOException {
IOUtils.close(out, posWriter);
}
@Override
public boolean reset() {
this.out.setUsedBlock(this.compressType);
this.out.resetBlockMode();
return true;
}
}