package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.Map; import java.util.HashMap; import java.util.List; import java.util.ArrayList; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil.Gather; // for javadocs import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; /** * Exposes flex API, merged from flex API of sub-segments. * This is useful when you're interacting with an {@link * IndexReader} implementation that consists of sequential * sub-readers (eg DirectoryReader or {@link * MultiReader}). * * <p><b>NOTE</b>: for multi readers, you'll get better * performance by gathering the sub readers using {@link * ReaderUtil#gatherSubReaders} and then operate per-reader, * instead of using this class. * * @lucene.experimental */ public final class MultiFields extends Fields { private final Fields[] subs; private final ReaderUtil.Slice[] subSlices; private final Map<String,Terms> terms = new HashMap<String,Terms>(); /** Returns a single {@link Fields} instance for this * reader, merging fields/terms/docs/positions on the * fly. This method will not return null. * * <p><b>NOTE</b>: this is a slow way to access postings. * It's better to get the sub-readers (using {@link * Gather}) and iterate through them * yourself. */ public static Fields getFields(IndexReader r) throws IOException { final IndexReader[] subs = r.getSequentialSubReaders(); if (subs == null) { // already an atomic reader return r.fields(); } else if (subs.length == 0) { // no fields return null; } else if (subs.length == 1) { return getFields(subs[0]); } else { Fields currentFields = r.retrieveFields(); if (currentFields == null) { final List<Fields> fields = new ArrayList<Fields>(); final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>(); new ReaderUtil.Gather(r) { @Override protected void add(int base, IndexReader r) throws IOException { final Fields f = r.fields(); if (f != null) { fields.add(f); slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1)); } } }.run(); if (fields.size() == 0) { return null; } else if (fields.size() == 1) { currentFields = fields.get(0); } else { currentFields = new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)); } r.storeFields(currentFields); } return currentFields; } } private static class MultiReaderBits implements Bits { private final int[] starts; private final IndexReader[] readers; private final Bits[] delDocs; public MultiReaderBits(int[] starts, IndexReader[] readers) { assert readers.length == starts.length-1; this.starts = starts; this.readers = readers; delDocs = new Bits[readers.length]; for(int i=0;i<readers.length;i++) { delDocs[i] = readers[i].getDeletedDocs(); } } public boolean get(int doc) { final int sub = ReaderUtil.subIndex(doc, starts); Bits dels = delDocs[sub]; if (dels == null) { // NOTE: this is not sync'd but multiple threads can // come through here; I think this is OK -- worst // case is more than 1 thread ends up filling in the // sub Bits dels = readers[sub].getDeletedDocs(); if (dels == null) { return false; } else { delDocs[sub] = dels; } } return dels.get(doc-starts[sub]); } public int length() { return starts[starts.length-1]; } } public static Bits getDeletedDocs(IndexReader r) { Bits result; if (r.hasDeletions()) { final List<IndexReader> readers = new ArrayList<IndexReader>(); final List<Integer> starts = new ArrayList<Integer>(); try { final int maxDoc = new ReaderUtil.Gather(r) { @Override protected void add(int base, IndexReader r) throws IOException { // record all delDocs, even if they are null readers.add(r); starts.add(base); } }.run(); starts.add(maxDoc); } catch (IOException ioe) { // should not happen throw new RuntimeException(ioe); } assert readers.size() > 0; if (readers.size() == 1) { // Only one actual sub reader -- optimize this case result = readers.get(0).getDeletedDocs(); } else { int[] startsArray = new int[starts.size()]; for(int i=0;i<startsArray.length;i++) { startsArray[i] = starts.get(i); } result = new MultiReaderBits(startsArray, readers.toArray(new IndexReader[readers.size()])); } } else { result = null; } return result; } /** This method may return null if the field does not exist.*/ public static Terms getTerms(IndexReader r, String field) throws IOException { final Fields fields = getFields(r); if (fields == null) { return null; } else { return fields.terms(field); } } /** Returns {@link DocsEnum} for the specified field & * term. This may return null if the term does not * exist. */ public static DocsEnum getTermDocsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { assert field != null; assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { return terms.docs(skipDocs, term, null); } else { return null; } } /** Returns {@link DocsAndPositionsEnum} for the specified * field & term. This may return null if the term does * not exist or positions were not indexed. */ public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { assert field != null; assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { return terms.docsAndPositions(skipDocs, term, null); } else { return null; } } public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) { this.subs = subs; this.subSlices = subSlices; } @Override public FieldsEnum iterator() throws IOException { final List<FieldsEnum> fieldsEnums = new ArrayList<FieldsEnum>(); final List<ReaderUtil.Slice> fieldsSlices = new ArrayList<ReaderUtil.Slice>(); for(int i=0;i<subs.length;i++) { fieldsEnums.add(subs[i].iterator()); fieldsSlices.add(subSlices[i]); } if (fieldsEnums.size() == 0) { return FieldsEnum.EMPTY; } else { return new MultiFieldsEnum(fieldsEnums.toArray(FieldsEnum.EMPTY_ARRAY), fieldsSlices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)); } } @Override public Terms terms(String field) throws IOException { final Terms result; if (!terms.containsKey(field)) { // Lazy init: first time this field is requested, we // create & add to terms: final List<Terms> subs2 = new ArrayList<Terms>(); final List<ReaderUtil.Slice> slices2 = new ArrayList<ReaderUtil.Slice>(); // Gather all sub-readers that share this field for(int i=0;i<subs.length;i++) { final Terms terms = subs[i].terms(field); if (terms != null) { subs2.add(terms); slices2.add(subSlices[i]); } } if (subs2.size() == 0) { result = null; } else { result = new MultiTerms(subs2.toArray(Terms.EMPTY_ARRAY), slices2.toArray(ReaderUtil.Slice.EMPTY_ARRAY)); } terms.put(field, result); } else { result = terms.get(field); } return result; } }