/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.IOException; import java.util.Collections; import java.util.IdentityHashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.apache.lucene.search.Sort; import org.apache.lucene.util.Bits; import org.apache.lucene.util.Version; /** An {@link LeafReader} which reads multiple, parallel indexes. Each index * added must have the same number of documents, but typically each contains * different fields. Deletions are taken from the first reader. * Each document contains the union of the fields of all documents * with the same document number. When searching, matches for a * query term are from the first index added that has the field. * * <p>This is useful, e.g., with collections that have large fields which * change rarely and small fields that change more frequently. The smaller * fields may be re-indexed in a new index and both indexes may be searched * together. * * <p><strong>Warning:</strong> It is up to you to make sure all indexes * are created and modified the same way. For example, if you add * documents to one index, you need to add the same documents in the * same order to the other indexes. <em>Failure to do so will result in * undefined behavior</em>. */ public class ParallelLeafReader extends LeafReader { private final FieldInfos fieldInfos; private final ParallelFields fields = new ParallelFields(); private final LeafReader[] parallelReaders, storedFieldsReaders; private final Set<LeafReader> completeReaderSet = Collections.newSetFromMap(new IdentityHashMap<LeafReader,Boolean>()); private final boolean closeSubReaders; private final int maxDoc, numDocs; private final boolean hasDeletions; private final LeafMetaData metaData; private final SortedMap<String,LeafReader> fieldToReader = new TreeMap<>(); private final SortedMap<String,LeafReader> tvFieldToReader = new TreeMap<>(); /** Create a ParallelLeafReader based on the provided * readers; auto-closes the given readers on {@link #close()}. */ public ParallelLeafReader(LeafReader... readers) throws IOException { this(true, readers); } /** Create a ParallelLeafReader based on the provided * readers. */ public ParallelLeafReader(boolean closeSubReaders, LeafReader... readers) throws IOException { this(closeSubReaders, readers, readers); } /** Expert: create a ParallelLeafReader based on the provided * readers and storedFieldReaders; when a document is * loaded, only storedFieldsReaders will be used. */ public ParallelLeafReader(boolean closeSubReaders, LeafReader[] readers, LeafReader[] storedFieldsReaders) throws IOException { this.closeSubReaders = closeSubReaders; if (readers.length == 0 && storedFieldsReaders.length > 0) throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used."); this.parallelReaders = readers.clone(); this.storedFieldsReaders = storedFieldsReaders.clone(); if (parallelReaders.length > 0) { final LeafReader first = parallelReaders[0]; this.maxDoc = first.maxDoc(); this.numDocs = first.numDocs(); this.hasDeletions = first.hasDeletions(); } else { this.maxDoc = this.numDocs = 0; this.hasDeletions = false; } Collections.addAll(completeReaderSet, this.parallelReaders); Collections.addAll(completeReaderSet, this.storedFieldsReaders); // check compatibility: for(LeafReader reader : completeReaderSet) { if (reader.maxDoc() != maxDoc) { throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc()); } } // TODO: make this read-only in a cleaner way? FieldInfos.Builder builder = new FieldInfos.Builder(); Sort indexSort = null; int createdVersionMajor = -1; // build FieldInfos and fieldToReader map: for (final LeafReader reader : this.parallelReaders) { LeafMetaData leafMetaData = reader.getMetaData(); Sort leafIndexSort = leafMetaData.getSort(); if (indexSort == null) { indexSort = leafIndexSort; } else if (leafIndexSort != null && indexSort.equals(leafIndexSort) == false) { throw new IllegalArgumentException("cannot combine LeafReaders that have different index sorts: saw both sort=" + indexSort + " and " + leafIndexSort); } if (createdVersionMajor == -1) { createdVersionMajor = leafMetaData.getCreatedVersionMajor(); } else if (createdVersionMajor != leafMetaData.getCreatedVersionMajor()) { throw new IllegalArgumentException("cannot combine LeafReaders that have different creation versions: saw both version=" + createdVersionMajor + " and " + leafMetaData.getCreatedVersionMajor()); } final FieldInfos readerFieldInfos = reader.getFieldInfos(); for (FieldInfo fieldInfo : readerFieldInfos) { // NOTE: first reader having a given field "wins": if (!fieldToReader.containsKey(fieldInfo.name)) { builder.add(fieldInfo); fieldToReader.put(fieldInfo.name, reader); if (fieldInfo.hasVectors()) { tvFieldToReader.put(fieldInfo.name, reader); } } } } if (createdVersionMajor == -1) { // empty reader createdVersionMajor = Version.LATEST.major; } Version minVersion = Version.LATEST; for (final LeafReader reader : this.parallelReaders) { Version leafVersion = reader.getMetaData().getMinVersion(); if (leafVersion == null) { minVersion = null; break; } else if (minVersion.onOrAfter(leafVersion)) { minVersion = leafVersion; } } fieldInfos = builder.finish(); this.metaData = new LeafMetaData(createdVersionMajor, minVersion, indexSort); // build Fields instance for (final LeafReader reader : this.parallelReaders) { final Fields readerFields = reader.fields(); for (String field : readerFields) { // only add if the reader responsible for that field name is the current: if (fieldToReader.get(field) == reader) { this.fields.addField(field, readerFields.terms(field)); } } } // do this finally so any Exceptions occurred before don't affect refcounts: for (LeafReader reader : completeReaderSet) { if (!closeSubReaders) { reader.incRef(); } reader.registerParentReader(this); } } @Override public String toString() { final StringBuilder buffer = new StringBuilder("ParallelLeafReader("); for (final Iterator<LeafReader> iter = completeReaderSet.iterator(); iter.hasNext();) { buffer.append(iter.next()); if (iter.hasNext()) buffer.append(", "); } return buffer.append(')').toString(); } // Single instance of this, per ParallelReader instance private static final class ParallelFields extends Fields { final Map<String,Terms> fields = new TreeMap<>(); ParallelFields() { } void addField(String fieldName, Terms terms) { fields.put(fieldName, terms); } @Override public Iterator<String> iterator() { return Collections.unmodifiableSet(fields.keySet()).iterator(); } @Override public Terms terms(String field) { return fields.get(field); } @Override public int size() { return fields.size(); } } /** * {@inheritDoc} * <p> * NOTE: the returned field numbers will likely not * correspond to the actual field numbers in the underlying * readers, and codec metadata ({@link FieldInfo#getAttribute(String)} * will be unavailable. */ @Override public FieldInfos getFieldInfos() { return fieldInfos; } @Override public Bits getLiveDocs() { ensureOpen(); return hasDeletions ? parallelReaders[0].getLiveDocs() : null; } @Override public Fields fields() { ensureOpen(); return fields; } @Override public int numDocs() { // Don't call ensureOpen() here (it could affect performance) return numDocs; } @Override public int maxDoc() { // Don't call ensureOpen() here (it could affect performance) return maxDoc; } @Override public void document(int docID, StoredFieldVisitor visitor) throws IOException { ensureOpen(); for (final LeafReader reader: storedFieldsReaders) { reader.document(docID, visitor); } } @Override public CacheHelper getCoreCacheHelper() { // ParallelReader instances can be short-lived, which would make caching trappy // so we do not cache on them, unless they wrap a single reader in which // case we delegate if (parallelReaders.length == 1 && storedFieldsReaders.length == 1 && parallelReaders[0] == storedFieldsReaders[0]) { return parallelReaders[0].getCoreCacheHelper(); } return null; } @Override public CacheHelper getReaderCacheHelper() { // ParallelReader instances can be short-lived, which would make caching trappy // so we do not cache on them, unless they wrap a single reader in which // case we delegate if (parallelReaders.length == 1 && storedFieldsReaders.length == 1 && parallelReaders[0] == storedFieldsReaders[0]) { return parallelReaders[0].getReaderCacheHelper(); } return null; } @Override public Fields getTermVectors(int docID) throws IOException { ensureOpen(); ParallelFields fields = null; for (Map.Entry<String,LeafReader> ent : tvFieldToReader.entrySet()) { String fieldName = ent.getKey(); Terms vector = ent.getValue().getTermVector(docID, fieldName); if (vector != null) { if (fields == null) { fields = new ParallelFields(); } fields.addField(fieldName, vector); } } return fields; } @Override protected synchronized void doClose() throws IOException { IOException ioe = null; for (LeafReader reader : completeReaderSet) { try { if (closeSubReaders) { reader.close(); } else { reader.decRef(); } } catch (IOException e) { if (ioe == null) ioe = e; } } // throw the first exception if (ioe != null) throw ioe; } @Override public NumericDocValues getNumericDocValues(String field) throws IOException { ensureOpen(); LeafReader reader = fieldToReader.get(field); return reader == null ? null : reader.getNumericDocValues(field); } @Override public BinaryDocValues getBinaryDocValues(String field) throws IOException { ensureOpen(); LeafReader reader = fieldToReader.get(field); return reader == null ? null : reader.getBinaryDocValues(field); } @Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen(); LeafReader reader = fieldToReader.get(field); return reader == null ? null : reader.getSortedDocValues(field); } @Override public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException { ensureOpen(); LeafReader reader = fieldToReader.get(field); return reader == null ? null : reader.getSortedNumericDocValues(field); } @Override public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { ensureOpen(); LeafReader reader = fieldToReader.get(field); return reader == null ? null : reader.getSortedSetDocValues(field); } @Override public NumericDocValues getNormValues(String field) throws IOException { ensureOpen(); LeafReader reader = fieldToReader.get(field); NumericDocValues values = reader == null ? null : reader.getNormValues(field); return values; } @Override public PointValues getPointValues(String fieldName) throws IOException { ensureOpen(); LeafReader reader = fieldToReader.get(fieldName); return reader == null ? null : reader.getPointValues(fieldName); } @Override public void checkIntegrity() throws IOException { ensureOpen(); for (LeafReader reader : completeReaderSet) { reader.checkIntegrity(); } } /** Returns the {@link LeafReader}s that were passed on init. */ public LeafReader[] getParallelReaders() { ensureOpen(); return parallelReaders; } @Override public LeafMetaData getMetaData() { return metaData; } }