package com.aconex.scrutineer.elasticsearch;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import com.aconex.scrutineer.IdAndVersionFactory;
import com.aconex.scrutineer.LogUtils;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.slf4j.Logger;
public class ElasticSearchDownloader {
private static final Logger LOG = LogUtils.loggerForThisClass();
static final int BATCH_SIZE = 100000;
static final int SCROLL_TIME_IN_MINUTES = 10;
private long numItems = 0;
private final Client client;
private final String indexName;
private final String query;
private final IdAndVersionFactory idAndVersionFactory;
public ElasticSearchDownloader(Client client, String indexName, String query, IdAndVersionFactory idAndVersionFactory) {
this.client = client;
this.indexName = indexName;
this.query = query;
this.idAndVersionFactory = idAndVersionFactory;
}
public void downloadTo(OutputStream outputStream) {
long begin = System.currentTimeMillis();
doDownloadTo(outputStream);
LogUtils.infoTimeTaken(LOG, begin, numItems, "Scan & Download completed");
}
private void doDownloadTo(OutputStream outputStream) {
try {
ObjectOutputStream objectOutputStream = new ObjectOutputStream(outputStream);
consumeBatches(objectOutputStream, startScroll().getScrollId());
objectOutputStream.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
void consumeBatches(ObjectOutputStream objectOutputStream, String initialScrollId) throws IOException {
String scrollId = initialScrollId;
SearchResponse batchSearchResponse = null;
do {
batchSearchResponse = client.prepareSearchScroll(scrollId).setScroll(TimeValue.timeValueMinutes(SCROLL_TIME_IN_MINUTES)).execute().actionGet();
scrollId = batchSearchResponse.getScrollId();
} while (writeSearchResponseToOutputStream(objectOutputStream, batchSearchResponse));
}
boolean writeSearchResponseToOutputStream(ObjectOutputStream objectOutputStream, SearchResponse searchResponse) throws IOException {
SearchHit[] hits = searchResponse.getHits().hits();
for (SearchHit hit : hits) {
idAndVersionFactory.create(hit.getId(), hit.getVersion()).writeToStream(objectOutputStream);
numItems++;
}
return hits.length > 0;
}
QueryStringQueryBuilder createQuery() {
return QueryBuilders.queryString(query).defaultOperator(QueryStringQueryBuilder.Operator.AND).defaultField("_all");
}
@SuppressWarnings("PMD.NcssMethodCount")
SearchResponse startScroll() {
SearchRequestBuilder searchRequestBuilder = client.prepareSearch(indexName);
searchRequestBuilder.setSearchType(SearchType.SCAN);
searchRequestBuilder.setQuery(createQuery());
searchRequestBuilder.setSize(BATCH_SIZE);
searchRequestBuilder.setExplain(false);
searchRequestBuilder.setNoFields();
searchRequestBuilder.setVersion(true);
searchRequestBuilder.setScroll(TimeValue.timeValueMinutes(SCROLL_TIME_IN_MINUTES));
return searchRequestBuilder.execute().actionGet();
}
}