/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.action.allterms;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.single.shard.TransportSingleShardAction;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.routing.ShardIterator;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class TransportAllTermsShardAction extends TransportSingleShardAction<AllTermsShardRequest, AllTermsSingleShardResponse> {
private final IndicesService indicesService;
private static final String ACTION_NAME = AllTermsAction.NAME + "[s]";
@Inject
public TransportAllTermsShardAction(Settings settings, ClusterService clusterService, TransportService transportService,
IndicesService indicesService, ThreadPool threadPool, ActionFilters actionFilters,
IndexNameExpressionResolver indexNameExpressionResolver) {
super(settings, ACTION_NAME, threadPool, clusterService, transportService, actionFilters, indexNameExpressionResolver,
AllTermsShardRequest::new, ThreadPool.Names.GENERIC);
this.indicesService = indicesService;
}
@Override
protected boolean isSubAction() {
return true;
}
@Override
protected AllTermsSingleShardResponse newResponse() {
return new AllTermsSingleShardResponse(null);
}
@Override
protected boolean resolveIndex(AllTermsShardRequest request) {
return false;
}
@Override
protected ShardIterator shards(ClusterState state, InternalRequest request) {
return clusterService.operationRouting()
.getShards(state, request.concreteIndex(), request.request().shardId(), request.request().preference());
}
@Override
protected AllTermsSingleShardResponse shardOperation(AllTermsShardRequest request, ShardId shardId) throws ElasticsearchException {
List<String> terms = new ArrayList<>();
IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex());
IndexShard indexShard = indexService.getShard(shardId.id());
final Engine.Searcher searcher = indexShard.acquireSearcher("all_terms");
IndexReader topLevelReader = searcher.reader();
List<LeafReaderContext> leaves = topLevelReader.leaves();
try {
if (leaves.size() == 0) {
return new AllTermsSingleShardResponse(terms);
}
getTerms(request, terms, leaves);
return new AllTermsSingleShardResponse(terms);
} finally {
searcher.close();
}
}
protected static void getTerms(AllTermsShardRequest request, List<String> terms, List<LeafReaderContext> leaves) {
List<TermsEnum> termIters = getTermsEnums(request, leaves);
CharsRefBuilder spare = new CharsRefBuilder();
BytesRef lastTerm = null;
int[] exhausted = new int[termIters.size()];
for (int i = 0; i < exhausted.length; i++) {
exhausted[i] = 0;
}
try {
lastTerm = findSmallestTermAfter(request, termIters, lastTerm, exhausted);
if (lastTerm == null) {
return;
}
findNMoreTerms(request, terms, termIters, spare, lastTerm, exhausted);
} catch (IOException e) {
}
}
protected static void findNMoreTerms(AllTermsShardRequest request, List<String> terms, List<TermsEnum> termIters, CharsRefBuilder spare,
BytesRef lastTerm, int[] exhausted) {
if (getDocFreq(termIters, lastTerm, exhausted) >= request.minDocFreq()) {
spare.copyUTF8Bytes(lastTerm);
terms.add(spare.toString());
}
BytesRef bytesRef = new BytesRef(lastTerm.utf8ToString());
lastTerm = bytesRef;
while (terms.size() < request.size() && lastTerm != null) {
moveIterators(exhausted, termIters, lastTerm);
lastTerm = findMinimum(exhausted, termIters);
if (lastTerm != null) {
if (getDocFreq(termIters, lastTerm, exhausted) >= request.minDocFreq()) {
spare.copyUTF8Bytes(lastTerm);
terms.add(spare.toString());
}
}
}
}
protected static List<TermsEnum> getTermsEnums(AllTermsShardRequest request, List<LeafReaderContext> leaves) {
List<TermsEnum> termIters = new ArrayList<>();
try {
for (LeafReaderContext reader : leaves) {
termIters.add(reader.reader().terms(request.field()).iterator());
}
} catch (IOException e) {
}
return termIters;
}
protected static BytesRef findSmallestTermAfter(AllTermsShardRequest request, List<TermsEnum> termIters, BytesRef lastTerm,
int[] exhausted) throws IOException {
for (int i = 0; i < termIters.size(); i++) {
BytesRef curTerm = null;
if (request.from() != null) {
// move to the term we want to start after
TermsEnum.SeekStatus seekStatus = termIters.get(i).seekCeil(new BytesRef(request.from()));
if (seekStatus.equals(TermsEnum.SeekStatus.END)) {
exhausted[i] = 1;
} else if (seekStatus.equals(TermsEnum.SeekStatus.FOUND)) {
curTerm = termIters.get(i).next();
if (curTerm == null) {
exhausted[i] = 1;
}
} else {
curTerm = termIters.get(i).term(); // otherwise we are good
}
} else {
curTerm = termIters.get(i).next();
if (curTerm == null) {
exhausted[i] = 1;// which means there were no terms at all which is odd but I am not sure this cannot happen
}
}
// see it it is the smallest term
if (exhausted[i] != 1) {
if (lastTerm == null) {
lastTerm = curTerm;
} else {
if (curTerm.compareTo(lastTerm) < 0) {
lastTerm = curTerm;
}
}
}
}
return lastTerm;
}
protected static long getDocFreq(List<TermsEnum> termIters, BytesRef lastTerm, int[] exhausted) {
long docFreq = 0;
for (int i = 0; i < termIters.size(); i++) {
if (exhausted[i] == 0) {
try {
if (termIters.get(i).term().compareTo(lastTerm) == 0) {
docFreq += termIters.get(i).docFreq();
}
} catch (IOException e) {
}
}
}
return docFreq;
}
// returns copy of the lexicographically smallest term found
protected static BytesRef findMinimum(int[] exhausted, List<TermsEnum> termIters) {
BytesRef minTerm = null;
for (int i = 0; i < termIters.size(); i++) {
if (exhausted[i] == 1) {
continue;
}
BytesRef candidate = null;
try {
candidate = termIters.get(i).term();
} catch (IOException e) {
}
if (minTerm == null) {
minTerm = candidate;
} else {
//it is actually smaller, so we use it
if (minTerm.compareTo(candidate) > 0) {
minTerm = candidate;
}
}
}
if (minTerm != null) {
BytesRef ret = new BytesRef(minTerm.utf8ToString());
return ret;
}
return null;
}
// last term is expected to be a copy of a term not just some reference into a terms iterator
protected static void moveIterators(int[] exhausted, List<TermsEnum> termIters, BytesRef lastTerm) {
try {
for (int i = 0; i < termIters.size(); i++) {
if (exhausted[i] == 1) {
continue;
}
if (termIters.get(i).term().compareTo(lastTerm) == 0) {
if (termIters.get(i).next() == null) {
exhausted[i] = 1;
}
}
}
} catch (IOException e) {
}
}
}