package org.elasticsearch.common.lucene.uid;
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Numbers;
import org.elasticsearch.common.lucene.uid.Versions.DocIdAndVersion;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
/** Utility class to do efficient primary-key (only 1 doc contains the
* given term) lookups by segment, re-using the enums. This class is
* not thread safe, so it is the caller's job to create and use one
* instance of this per thread. Do not use this if a term may appear
* in more than one document! It will only return the first one it
* finds. */
final class PerThreadIDAndVersionLookup {
// TODO: do we really need to store all this stuff? some if it might not speed up anything.
// we keep it around for now, to reduce the amount of e.g. hash lookups by field and stuff
/** terms enum for uid field */
private final TermsEnum termsEnum;
/** _version data */
private final NumericDocValues versions;
/** Only true when versions are indexed as payloads instead of docvalues */
private final boolean hasPayloads;
/** Reused for iteration (when the term exists) */
private PostingsEnum docsEnum;
/** Only used for back compat, to lookup a version from payload */
private PostingsEnum posEnum;
/**
* Initialize lookup for the provided segment
*/
public PerThreadIDAndVersionLookup(LeafReader reader) throws IOException {
TermsEnum termsEnum = null;
NumericDocValues versions = null;
boolean hasPayloads = false;
Fields fields = reader.fields();
if (fields != null) {
Terms terms = fields.terms(UidFieldMapper.NAME);
if (terms != null) {
hasPayloads = terms.hasPayloads();
termsEnum = terms.iterator();
assert termsEnum != null;
versions = reader.getNumericDocValues(VersionFieldMapper.NAME);
}
}
this.versions = versions;
this.termsEnum = termsEnum;
this.hasPayloads = hasPayloads;
}
/** Return null if id is not found. */
public DocIdAndVersion lookup(BytesRef id, Bits liveDocs, LeafReaderContext context) throws IOException {
if (termsEnum.seekExact(id)) {
if (versions != null || hasPayloads == false) {
// Use NDV to retrieve the version, in which case we only need PostingsEnum:
// there may be more than one matching docID, in the case of nested docs, so we want the last one:
docsEnum = termsEnum.postings(docsEnum, 0);
int docID = DocIdSetIterator.NO_MORE_DOCS;
for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
if (liveDocs != null && liveDocs.get(d) == false) {
continue;
}
docID = d;
}
if (docID != DocIdSetIterator.NO_MORE_DOCS) {
if (versions != null) {
return new DocIdAndVersion(docID, versions.get(docID), context);
} else {
// _uid found, but no doc values and no payloads
return new DocIdAndVersion(docID, Versions.NOT_SET, context);
}
}
}
// ... but used to be stored as payloads; in this case we must use PostingsEnum
posEnum = termsEnum.postings(posEnum, PostingsEnum.PAYLOADS);
assert posEnum != null; // terms has payloads
for (int d = posEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = posEnum.nextDoc()) {
if (liveDocs != null && liveDocs.get(d) == false) {
continue;
}
posEnum.nextPosition();
final BytesRef payload = posEnum.getPayload();
if (payload != null && payload.length == 8) {
// TODO: does this break the nested docs case? we are not returning the last matching docID here?
return new DocIdAndVersion(d, Numbers.bytesToLong(payload), context);
}
}
}
return null;
}
}