package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
/** Holds common state used during segment merging.
*
* @lucene.experimental */
public class MergeState {
/**
* Remaps docids around deletes during merge
*/
public static abstract class DocMap {
private final Bits liveDocs;
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected DocMap(Bits liveDocs) {
this.liveDocs = liveDocs;
}
/** Creates a {@link DocMap} instance appropriate for
* this reader. */
public static DocMap build(AtomicReader reader) {
final int maxDoc = reader.maxDoc();
final int numDeletes = reader.numDeletedDocs();
final int numDocs = maxDoc - numDeletes;
assert reader.getLiveDocs() != null || numDeletes == 0;
if (numDeletes == 0) {
return new NoDelDocMap(maxDoc);
} else if (numDeletes < numDocs) {
return buildDelCountDocmap(maxDoc, numDeletes, reader.getLiveDocs(), PackedInts.COMPACT);
} else {
return buildDirectDocMap(maxDoc, numDocs, reader.getLiveDocs(), PackedInts.COMPACT);
}
}
static DocMap buildDelCountDocmap(int maxDoc, int numDeletes, Bits liveDocs, float acceptableOverheadRatio) {
PackedInts.Mutable numDeletesSoFar = PackedInts.getMutable(maxDoc,
PackedInts.bitsRequired(numDeletes), acceptableOverheadRatio);
int del = 0;
for (int i = 0; i < maxDoc; ++i) {
if (!liveDocs.get(i)) {
++del;
}
numDeletesSoFar.set(i, del);
}
assert del == numDeletes : "del=" + del + ", numdeletes=" + numDeletes;
return new DelCountDocMap(liveDocs, numDeletesSoFar);
}
static DocMap buildDirectDocMap(int maxDoc, int numDocs, Bits liveDocs, float acceptableOverheadRatio) {
PackedInts.Mutable docIds = PackedInts.getMutable(maxDoc,
PackedInts.bitsRequired(Math.max(0, numDocs - 1)), acceptableOverheadRatio);
int del = 0;
for (int i = 0; i < maxDoc; ++i) {
if (liveDocs.get(i)) {
docIds.set(i, i - del);
} else {
++del;
}
}
assert numDocs + del == maxDoc : "maxDoc=" + maxDoc + ", del=" + del + ", numDocs=" + numDocs;
return new DirectDocMap(liveDocs, docIds, del);
}
/** Returns the mapped docID corresponding to the provided one. */
public int get(int docId) {
if (liveDocs == null || liveDocs.get(docId)) {
return remap(docId);
} else {
return -1;
}
}
/** Returns the mapped docID corresponding to the provided one. */
public abstract int remap(int docId);
/** Returns the total number of documents, ignoring
* deletions. */
public abstract int maxDoc();
/** Returns the number of not-deleted documents. */
public final int numDocs() {
return maxDoc() - numDeletedDocs();
}
/** Returns the number of deleted documents. */
public abstract int numDeletedDocs();
/** Returns true if there are any deletions. */
public boolean hasDeletions() {
return numDeletedDocs() > 0;
}
}
private static class NoDelDocMap extends DocMap {
private final int maxDoc;
private NoDelDocMap(int maxDoc) {
super(null);
this.maxDoc = maxDoc;
}
@Override
public int remap(int docId) {
return docId;
}
@Override
public int maxDoc() {
return maxDoc;
}
@Override
public int numDeletedDocs() {
return 0;
}
}
private static class DirectDocMap extends DocMap {
private final PackedInts.Mutable docIds;
private final int numDeletedDocs;
private DirectDocMap(Bits liveDocs, PackedInts.Mutable docIds, int numDeletedDocs) {
super(liveDocs);
this.docIds = docIds;
this.numDeletedDocs = numDeletedDocs;
}
@Override
public int remap(int docId) {
return (int) docIds.get(docId);
}
@Override
public int maxDoc() {
return docIds.size();
}
@Override
public int numDeletedDocs() {
return numDeletedDocs;
}
}
private static class DelCountDocMap extends DocMap {
private final PackedInts.Mutable numDeletesSoFar;
private DelCountDocMap(Bits liveDocs, PackedInts.Mutable numDeletesSoFar) {
super(liveDocs);
this.numDeletesSoFar = numDeletesSoFar;
}
@Override
public int remap(int docId) {
return docId - (int) numDeletesSoFar.get(docId);
}
@Override
public int maxDoc() {
return numDeletesSoFar.size();
}
@Override
public int numDeletedDocs() {
final int maxDoc = maxDoc();
return (int) numDeletesSoFar.get(maxDoc - 1);
}
}
/** {@link SegmentInfo} of the newly merged segment. */
public SegmentInfo segmentInfo;
/** {@link FieldInfos} of the newly merged segment. */
public FieldInfos fieldInfos;
/** Readers being merged. */
public List<AtomicReader> readers;
/** Maps docIDs around deletions. */
public DocMap[] docMaps;
/** New docID base per reader. */
public int[] docBase;
/** Holds the CheckAbort instance, which is invoked
* periodically to see if the merge has been aborted. */
public CheckAbort checkAbort;
/** InfoStream for debugging messages. */
public InfoStream infoStream;
/** Current field being merged. */
public FieldInfo fieldInfo;
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
// but is this really so expensive to compute again in different components, versus once in SM?
/** {@link SegmentReader}s that have identical field
* name/number mapping, so their stored fields and term
* vectors may be bulk merged. */
public SegmentReader[] matchingSegmentReaders;
/** How many {@link #matchingSegmentReaders} are set. */
public int matchedCount;
/** Sole constructor. */
MergeState() {
}
/**
* Class for recording units of work when merging segments.
*/
public static class CheckAbort {
private double workCount;
private final MergePolicy.OneMerge merge;
private final Directory dir;
/** Creates a #CheckAbort instance. */
public CheckAbort(MergePolicy.OneMerge merge, Directory dir) {
this.merge = merge;
this.dir = dir;
}
/**
* Records the fact that roughly units amount of work
* have been done since this method was last called.
* When adding time-consuming code into SegmentMerger,
* you should test different values for units to ensure
* that the time in between calls to merge.checkAborted
* is up to ~ 1 second.
*/
public void work(double units) throws MergePolicy.MergeAbortedException {
workCount += units;
if (workCount >= 10000.0) {
merge.checkAborted(dir);
workCount = 0;
}
}
/** If you use this: IW.close(false) cannot abort your merge!
* @lucene.internal */
static final MergeState.CheckAbort NONE = new MergeState.CheckAbort(null, null) {
@Override
public void work(double units) {
// do nothing
}
};
}
}