package org.apache.lucene.index.sorter; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; // javadocs import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.MergeTrigger; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.search.Sort; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; /** A {@link MergePolicy} that reorders documents according to a {@link Sort} * before merging them. As a consequence, all segments resulting from a merge * will be sorted while segments resulting from a flush will be in the order * in which documents have been added. * <p><b>NOTE</b>: Never use this policy if you rely on * {@link IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments} * to have sequentially-assigned doc IDs, this policy will scatter doc IDs. * <p><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s * so that the order of segments is predictable. For example, using * {@link Sort#INDEXORDER} in reverse (which is not idempotent) will make * the order of documents in a segment depend on the number of times the segment * has been merged. * @lucene.experimental */ public final class SortingMergePolicy extends MergePolicy { /** * Put in the {@link SegmentInfo#getDiagnostics() diagnostics} to denote that * this segment is sorted. */ public static final String SORTER_ID_PROP = "sorter"; class SortingOneMerge extends OneMerge { List<AtomicReader> unsortedReaders; Sorter.DocMap docMap; AtomicReader sortedView; SortingOneMerge(List<SegmentCommitInfo> segments) { super(segments); } @Override public List<AtomicReader> getMergeReaders() throws IOException { if (unsortedReaders == null) { unsortedReaders = super.getMergeReaders(); final AtomicReader atomicView; if (unsortedReaders.size() == 1) { atomicView = unsortedReaders.get(0); } else { final IndexReader multiReader = new MultiReader(unsortedReaders.toArray(new AtomicReader[unsortedReaders.size()])); atomicView = SlowCompositeReaderWrapper.wrap(multiReader); } docMap = sorter.sort(atomicView); sortedView = SortingAtomicReader.wrap(atomicView, docMap); } // a null doc map means that the readers are already sorted return docMap == null ? unsortedReaders : Collections.singletonList(sortedView); } @Override public void setInfo(SegmentCommitInfo info) { Map<String,String> diagnostics = info.info.getDiagnostics(); diagnostics.put(SORTER_ID_PROP, sorter.getID()); super.setInfo(info); } private MonotonicAppendingLongBuffer getDeletes(List<AtomicReader> readers) { MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer(); int deleteCount = 0; for (AtomicReader reader : readers) { final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < maxDoc; ++i) { if (liveDocs != null && !liveDocs.get(i)) { ++deleteCount; } else { deletes.add(deleteCount); } } } deletes.freeze(); return deletes; } @Override public MergePolicy.DocMap getDocMap(final MergeState mergeState) { if (unsortedReaders == null) { throw new IllegalStateException(); } if (docMap == null) { return super.getDocMap(mergeState); } assert mergeState.docMaps.length == 1; // we returned a singleton reader final MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders); return new MergePolicy.DocMap() { @Override public int map(int old) { final int oldWithDeletes = old + (int) deletes.get(old); final int newWithDeletes = docMap.oldToNew(oldWithDeletes); return mergeState.docMaps[0].get(newWithDeletes); } }; } } class SortingMergeSpecification extends MergeSpecification { @Override public void add(OneMerge merge) { super.add(new SortingOneMerge(merge.segments)); } @Override public String segString(Directory dir) { return "SortingMergeSpec(" + super.segString(dir) + ", sorter=" + sorter + ")"; } } /** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */ public static boolean isSorted(AtomicReader reader, Sort sort) { if (reader instanceof SegmentReader) { final SegmentReader segReader = (SegmentReader) reader; final Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics(); if (diagnostics != null && sort.toString().equals(diagnostics.get(SORTER_ID_PROP))) { return true; } } return false; } private MergeSpecification sortedMergeSpecification(MergeSpecification specification) { if (specification == null) { return null; } MergeSpecification sortingSpec = new SortingMergeSpecification(); for (OneMerge merge : specification.merges) { sortingSpec.add(merge); } return sortingSpec; } final MergePolicy in; final Sorter sorter; final Sort sort; /** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */ public SortingMergePolicy(MergePolicy in, Sort sort) { this.in = in; this.sorter = new Sorter(sort); this.sort = sort; } @Override public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos) throws IOException { return sortedMergeSpecification(in.findMerges(mergeTrigger, segmentInfos)); } @Override public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge) throws IOException { return sortedMergeSpecification(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge)); } @Override public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos) throws IOException { return sortedMergeSpecification(in.findForcedDeletesMerges(segmentInfos)); } @Override public MergePolicy clone() { return new SortingMergePolicy(in.clone(), sort); } @Override public void close() { in.close(); } @Override public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment) throws IOException { return in.useCompoundFile(segments, newSegment); } @Override public void setIndexWriter(IndexWriter writer) { in.setIndexWriter(writer); } @Override public String toString() { return "SortingMergePolicy(" + in + ", sorter=" + sorter + ")"; } }