package org.apache.lucene.index; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.PerDocConsumer; import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; /** * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, * into a single Segment. After adding the appropriate readers, call the merge method to combine the * segments. * * @see #merge * @see #add */ final class SegmentMerger { private final Directory directory; private final int termIndexInterval; private final Codec codec; private final IOContext context; private final MergeState mergeState = new MergeState(); private final FieldInfos.Builder fieldInfosBuilder; // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! SegmentMerger(SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) { mergeState.segmentInfo = segmentInfo; mergeState.infoStream = infoStream; mergeState.readers = new ArrayList<AtomicReader>(); mergeState.checkAbort = checkAbort; directory = dir; this.termIndexInterval = termIndexInterval; this.codec = segmentInfo.getCodec(); this.context = context; this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); } /** * Add an IndexReader to the collection of readers that are to be merged */ final void add(IndexReader reader) { for (final AtomicReaderContext ctx : reader.leaves()) { final AtomicReader r = ctx.reader(); mergeState.readers.add(r); } } final void add(SegmentReader reader) { mergeState.readers.add(reader); } /** * Merges the readers specified by the {@link #add} method into the directory passed to the constructor * @return The number of documents that were merged * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ final MergeState merge() throws IOException { // NOTE: it's important to add calls to // checkAbort.work(...) if you make any changes to this // method that will spend alot of time. The frequency // of this check impacts how long // IndexWriter.close(false) takes to actually stop the // threads. mergeState.segmentInfo.setDocCount(setDocMaps()); mergeDocValuesAndNormsFieldInfos(); setMatchingSegmentReaders(); int numMerged = mergeFields(); assert numMerged == mergeState.segmentInfo.getDocCount(); final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo, mergeState.fieldInfos, termIndexInterval, null, context); mergeTerms(segmentWriteState); mergePerDoc(segmentWriteState); if (mergeState.fieldInfos.hasNorms()) { mergeNorms(segmentWriteState); } if (mergeState.fieldInfos.hasVectors()) { numMerged = mergeVectors(); assert numMerged == mergeState.segmentInfo.getDocCount(); } // write the merged infos FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat().getFieldInfosWriter(); fieldInfosWriter.write(directory, mergeState.segmentInfo.name, mergeState.fieldInfos, context); return mergeState; } private void setMatchingSegmentReaders() { // If the i'th reader is a SegmentReader and has // identical fieldName -> number mapping, then this // array will be non-null at position i: int numReaders = mergeState.readers.size(); mergeState.matchingSegmentReaders = new SegmentReader[numReaders]; // If this reader is a SegmentReader, and all of its // field name -> number mappings match the "merged" // FieldInfos, then we can do a bulk copy of the // stored fields: for (int i = 0; i < numReaders; i++) { AtomicReader reader = mergeState.readers.get(i); // TODO: we may be able to broaden this to // non-SegmentReaders, since FieldInfos is now // required? But... this'd also require exposing // bulk-copy (TVs and stored fields) API in foreign // readers.. if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; boolean same = true; FieldInfos segmentFieldInfos = segmentReader.getFieldInfos(); for (FieldInfo fi : segmentFieldInfos) { FieldInfo other = mergeState.fieldInfos.fieldInfo(fi.number); if (other == null || !other.name.equals(fi.name)) { same = false; break; } } if (same) { mergeState.matchingSegmentReaders[i] = segmentReader; mergeState.matchedCount++; } } } if (mergeState.infoStream.isEnabled("SM")) { mergeState.infoStream.message("SM", "merge store matchedCount=" + mergeState.matchedCount + " vs " + mergeState.readers.size()); if (mergeState.matchedCount != mergeState.readers.size()) { mergeState.infoStream.message("SM", "" + (mergeState.readers.size() - mergeState.matchedCount) + " non-bulk merges"); } } } // returns an updated typepromoter (tracking type and size) given a previous one, // and a newly encountered docvalues private TypePromoter mergeDocValuesType(TypePromoter previous, DocValues docValues) { TypePromoter incoming = TypePromoter.create(docValues.getType(), docValues.getValueSize()); if (previous == null) { previous = TypePromoter.getIdentityPromoter(); } return previous.promote(incoming); } // NOTE: this is actually merging all the fieldinfos public void mergeDocValuesAndNormsFieldInfos() throws IOException { // mapping from all docvalues fields found to their promoted types // this is because FieldInfos does not store the // valueSize Map<FieldInfo,TypePromoter> docValuesTypes = new HashMap<FieldInfo,TypePromoter>(); Map<FieldInfo,TypePromoter> normValuesTypes = new HashMap<FieldInfo,TypePromoter>(); for (AtomicReader reader : mergeState.readers) { FieldInfos readerFieldInfos = reader.getFieldInfos(); for (FieldInfo fi : readerFieldInfos) { FieldInfo merged = fieldInfosBuilder.add(fi); // update the type promotion mapping for this reader if (fi.hasDocValues()) { TypePromoter previous = docValuesTypes.get(merged); docValuesTypes.put(merged, mergeDocValuesType(previous, reader.docValues(fi.name))); } if (fi.hasNorms()) { TypePromoter previous = normValuesTypes.get(merged); normValuesTypes.put(merged, mergeDocValuesType(previous, reader.normValues(fi.name))); } } } updatePromoted(normValuesTypes, true); updatePromoted(docValuesTypes, false); mergeState.fieldInfos = fieldInfosBuilder.finish(); } protected void updatePromoted(Map<FieldInfo,TypePromoter> infoAndPromoter, boolean norms) { // update any promoted doc values types: for (Map.Entry<FieldInfo,TypePromoter> e : infoAndPromoter.entrySet()) { FieldInfo fi = e.getKey(); TypePromoter promoter = e.getValue(); if (promoter == null) { if (norms) { fi.setNormValueType(null); } else { fi.setDocValuesType(null); } } else { assert promoter != TypePromoter.getIdentityPromoter(); if (norms) { if (fi.getNormType() != promoter.type() && !fi.omitsNorms()) { // reset the type if we got promoted fi.setNormValueType(promoter.type()); } } else { if (fi.getDocValuesType() != promoter.type()) { // reset the type if we got promoted fi.setDocValuesType(promoter.type()); } } } } } /** * * @return The number of documents in all of the readers * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ private int mergeFields() throws IOException { final StoredFieldsWriter fieldsWriter = codec.storedFieldsFormat().fieldsWriter(directory, mergeState.segmentInfo, context); try { return fieldsWriter.merge(mergeState); } finally { fieldsWriter.close(); } } /** * Merge the TermVectors from each of the segments into the new one. * @throws IOException if there is a low-level IO error */ private final int mergeVectors() throws IOException { final TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context); try { return termVectorsWriter.merge(mergeState); } finally { termVectorsWriter.close(); } } // NOTE: removes any "all deleted" readers from mergeState.readers private int setDocMaps() throws IOException { final int numReaders = mergeState.readers.size(); // Remap docIDs mergeState.docMaps = new MergeState.DocMap[numReaders]; mergeState.docBase = new int[numReaders]; int docBase = 0; int i = 0; while(i < mergeState.readers.size()) { final AtomicReader reader = mergeState.readers.get(i); mergeState.docBase[i] = docBase; final MergeState.DocMap docMap = MergeState.DocMap.build(reader); mergeState.docMaps[i] = docMap; docBase += docMap.numDocs(); i++; } return docBase; } private final void mergeTerms(SegmentWriteState segmentWriteState) throws IOException { final List<Fields> fields = new ArrayList<Fields>(); final List<ReaderSlice> slices = new ArrayList<ReaderSlice>(); int docBase = 0; for(int readerIndex=0;readerIndex<mergeState.readers.size();readerIndex++) { final AtomicReader reader = mergeState.readers.get(readerIndex); final Fields f = reader.fields(); final int maxDoc = reader.maxDoc(); if (f != null) { slices.add(new ReaderSlice(docBase, maxDoc, readerIndex)); fields.add(f); } docBase += maxDoc; } final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState); boolean success = false; try { consumer.merge(mergeState, new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), slices.toArray(ReaderSlice.EMPTY_ARRAY))); success = true; } finally { if (success) { IOUtils.close(consumer); } else { IOUtils.closeWhileHandlingException(consumer); } } } private void mergePerDoc(SegmentWriteState segmentWriteState) throws IOException { final PerDocConsumer docsConsumer = codec.docValuesFormat() .docsConsumer(new PerDocWriteState(segmentWriteState)); // TODO: remove this check when 3.x indexes are no longer supported // (3.x indexes don't have docvalues) if (docsConsumer == null) { return; } boolean success = false; try { docsConsumer.merge(mergeState); success = true; } finally { if (success) { IOUtils.close(docsConsumer); } else { IOUtils.closeWhileHandlingException(docsConsumer); } } } private void mergeNorms(SegmentWriteState segmentWriteState) throws IOException { final PerDocConsumer docsConsumer = codec.normsFormat() .docsConsumer(new PerDocWriteState(segmentWriteState)); // TODO: remove this check when 3.x indexes are no longer supported // (3.x indexes don't have docvalues) if (docsConsumer == null) { return; } boolean success = false; try { docsConsumer.merge(mergeState); success = true; } finally { if (success) { IOUtils.close(docsConsumer); } else { IOUtils.closeWhileHandlingException(docsConsumer); } } } }