package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.MutableBits;
/**
* Lucene 4.0 Live Documents Format.
* <p>
* <p>The .del file is optional, and only exists when a segment contains
* deletions.</p>
* <p>Although per-segment, this file is maintained exterior to compound segment
* files.</p>
* <p>Deletions (.del) --> Format,Header,ByteCount,BitCount, Bits | DGaps (depending
* on Format)</p>
* <ul>
* <li>Format,ByteSize,BitCount --> {@link DataOutput#writeInt Uint32}</li>
* <li>Bits --> <{@link DataOutput#writeByte Byte}> <sup>ByteCount</sup></li>
* <li>DGaps --> <DGap,NonOnesByte> <sup>NonzeroBytesCount</sup></li>
* <li>DGap --> {@link DataOutput#writeVInt VInt}</li>
* <li>NonOnesByte --> {@link DataOutput#writeByte Byte}</li>
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
* </ul>
* <p>Format is 1: indicates cleared DGaps.</p>
* <p>ByteCount indicates the number of bytes in Bits. It is typically
* (SegSize/8)+1.</p>
* <p>BitCount indicates the number of bits that are currently set in Bits.</p>
* <p>Bits contains one bit for each document indexed. When the bit corresponding
* to a document number is cleared, that document is marked as deleted. Bit ordering
* is from least to most significant. Thus, if Bits contains two bytes, 0x00 and
* 0x02, then document 9 is marked as alive (not deleted).</p>
* <p>DGaps represents sparse bit-vectors more efficiently than Bits. It is made
* of DGaps on indexes of nonOnes bytes in Bits, and the nonOnes bytes themselves.
* The number of nonOnes bytes in Bits (NonOnesBytesCount) is not stored.</p>
* <p>For example, if there are 8000 bits and only bits 10,12,32 are cleared, DGaps
* would be used:</p>
* <p>(VInt) 1 , (byte) 20 , (VInt) 3 , (Byte) 1</p>
*/
public class Lucene40LiveDocsFormat extends LiveDocsFormat {
/** Extension of deletes */
static final String DELETES_EXTENSION = "del";
/** Sole constructor. */
public Lucene40LiveDocsFormat() {
}
@Override
public MutableBits newLiveDocs(int size) throws IOException {
BitVector bitVector = new BitVector(size);
bitVector.invertAll();
return bitVector;
}
@Override
public MutableBits newLiveDocs(Bits existing) throws IOException {
final BitVector liveDocs = (BitVector) existing;
return liveDocs.clone();
}
@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
String filename = IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getDelGen());
final BitVector liveDocs = new BitVector(dir, filename, context);
assert liveDocs.count() == info.info.getDocCount() - info.getDelCount():
"liveDocs.count()=" + liveDocs.count() + " info.docCount=" + info.info.getDocCount() + " info.getDelCount()=" + info.getDelCount();
assert liveDocs.length() == info.info.getDocCount();
return liveDocs;
}
@Override
public void writeLiveDocs(MutableBits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException {
String filename = IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getNextDelGen());
final BitVector liveDocs = (BitVector) bits;
assert liveDocs.count() == info.info.getDocCount() - info.getDelCount() - newDelCount;
assert liveDocs.length() == info.info.getDocCount();
liveDocs.write(dir, filename, context);
}
@Override
public void files(SegmentCommitInfo info, Collection<String> files) throws IOException {
if (info.hasDeletions()) {
files.add(IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getDelGen()));
}
}
}