LeafIndexLookup.java example

Explorer
elassandra-master
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.search.lookup;

import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.util.MinimalMap;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

public class LeafIndexLookup extends MinimalMap<String, IndexField> {

    // Current reader from which we can get the term vectors. No info on term
    // and field statistics.
    private final LeafReader reader;

    // The parent reader from which we can get proper field and term
    // statistics
    private final IndexReader parentReader;

    // we need this later to get the field and term statistics of the shard
    private final IndexSearcher indexSearcher;

    // current docId
    private int docId = -1;

    // stores the objects that are used in the script. we maintain this map
    // because we do not want to re-initialize the objects each time a field is
    // accessed
    private final Map<String, IndexField> indexFields = new HashMap<>();

    // number of documents per shard. cached here because the computation is
    // expensive
    private int numDocs = -1;

    // the maximum doc number of the shard.
    private int maxDoc = -1;

    // number of deleted documents per shard. cached here because the
    // computation is expensive
    private int numDeletedDocs = -1;

    public int numDocs() {
        if (numDocs == -1) {
            numDocs = parentReader.numDocs();
        }
        return numDocs;
    }

    public int maxDoc() {
        if (maxDoc == -1) {
            maxDoc = parentReader.maxDoc();
        }
        return maxDoc;
    }

    public int numDeletedDocs() {
        if (numDeletedDocs == -1) {
            numDeletedDocs = parentReader.numDeletedDocs();
        }
        return numDeletedDocs;
    }

    public LeafIndexLookup(LeafReaderContext ctx) {
        reader = ctx.reader();
        parentReader = ReaderUtil.getTopLevelContext(ctx).reader();
        indexSearcher = new IndexSearcher(parentReader);
        indexSearcher.setQueryCache(null);
    }

    public void setDocument(int docId) {
        if (this.docId == docId) { // if we are called with the same docId,
                                   // nothing to do
            return;
        }
        // We assume that docs are processed in ascending order of id. If this
        // is not the case, we would have to re initialize all posting lists in
        // IndexFieldTerm. TODO: Instead of assert we could also call
        // setReaderInFields(); here?
        if (this.docId > docId) {
            // This might happen if the same SearchLookup is used in different
            // phases, such as score and fetch phase.
            // In this case we do not want to re initialize posting list etc.
            // because we do not even know if term and field statistics will be
            // needed in this new phase.
            // Therefore we just remove all IndexFieldTerms.
            indexFields.clear();
        }
        this.docId = docId;
        setNextDocIdInFields();
    }

    protected void setNextDocIdInFields() {
        for (IndexField stat : indexFields.values()) {
            stat.setDocIdInTerms(this.docId);
        }
    }

    /*
     * TODO: here might be potential for running time improvement? If we knew in
     * advance which terms are requested, we could provide an array which the
     * user could then iterate over.
     */
    @Override
    public IndexField get(Object key) {
        String stringField = (String) key;
        IndexField indexField = indexFields.get(key);
        if (indexField == null) {
            try {
                indexField = new IndexField(stringField, this);
                indexFields.put(stringField, indexField);
            } catch (IOException e) {
                throw new ElasticsearchException(e.getMessage());
            }
        }
        return indexField;
    }

    /*
     * Get the lucene term vectors. See
     * https://lucene.apache.org/core/4_0_0/core/org/apache/lucene/index/Fields.html
     * *
     */
    public Fields termVectors() throws IOException {
        assert reader != null;
        return reader.getTermVectors(docId);
    }

    LeafReader getReader() {
        return reader;
    }

    public int getDocId() {
        return docId;
    }

    public IndexReader getParentReader() {
        if (parentReader == null) {
            return reader;
        }
        return parentReader;
    }

    public IndexSearcher getIndexSearcher() {
        return indexSearcher;
    }

    public IndexReaderContext getReaderContext() {
        return getParentReader().getContext();
    }
}