/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.streams.elasticsearch; import org.apache.streams.config.ComponentConfigurator; import org.apache.streams.config.StreamsConfigurator; import org.apache.streams.jackson.StreamsJacksonMapper; import com.fasterxml.jackson.core.JsonProcessingException; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.script.Script; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.sort.SortBuilders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Serializable; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * Helper for building, querying, and paging an elasticsearch query. */ public class ElasticsearchQuery implements Iterable<SearchHit>, Iterator<SearchHit>, Serializable { private static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchQuery.class); private static final int SCROLL_POSITION_NOT_INITIALIZED = -3; private ElasticsearchClientManager elasticsearchClientManager; private ElasticsearchReaderConfiguration config; private List<String> indexes = new ArrayList<>(); private List<String> types = new ArrayList<>(); private int limit = 1000 * 1000 * 1000; // we are going to set the default limit very high to 1bil private int batchSize = 100; private String scrollTimeout = "5m"; private org.elasticsearch.index.query.QueryBuilder queryBuilder; private SearchRequestBuilder search; private SearchResponse scrollResp; private int scrollPositionInScroll = SCROLL_POSITION_NOT_INITIALIZED; private SearchHit next = null; private long totalHits = 0; private long totalRead = 0; private StreamsJacksonMapper mapper = StreamsJacksonMapper.getInstance(); /** * ElasticsearchQuery constructor - resolves ElasticsearchReaderConfiguration from JVM 'elasticsearch'. */ public ElasticsearchQuery() { this(new ComponentConfigurator<>(ElasticsearchReaderConfiguration.class) .detectConfiguration(StreamsConfigurator.getConfig().getConfig("elasticsearch"))); } /** * ElasticsearchQuery constructor - uses provided ElasticsearchReaderConfiguration. */ public ElasticsearchQuery(ElasticsearchReaderConfiguration config) { this.config = config; this.elasticsearchClientManager = ElasticsearchClientManager.getInstance(config); this.indexes.addAll(config.getIndexes()); this.types.addAll(config.getTypes()); this.scrollTimeout = config.getScrollTimeout(); } public long getHitCount() { return this.search == null ? 0 : this.totalHits; } public long getReadCount() { return this.totalRead; } public double getReadPercent() { return (double) this.getReadCount() / (double) this.getHitCount(); } public long getRemainingCount() { return this.totalRead - this.totalHits; } public void setBatchSize(int batchSize) { this.batchSize = batchSize; } public void setScrollTimeout(String scrollTimeout) { this.scrollTimeout = scrollTimeout; } public void setQueryBuilder(QueryBuilder queryBuilder) { this.queryBuilder = queryBuilder; } /** * execute ElasticsearchQuery. * @param obj deprecated */ public void execute(Object obj) { // If we haven't already set up the search, then set up the search. if (search == null) { search = elasticsearchClientManager.client() .prepareSearch(indexes.toArray(new String[0])) .setSearchType(SearchType.SCAN) .setExplain(true) .addField("*") .setFetchSource(true) .setSize(batchSize) .setScroll(scrollTimeout) .addField("_timestamp"); LOGGER.debug("Search source: " + search.toString()); String searchJson; if ( config.getSearch() != null ) { LOGGER.debug("Have config in Reader: " + config.getSearch().toString()); try { searchJson = mapper.writeValueAsString(config.getSearch()); LOGGER.debug("Extra source: " + searchJson); search = search.setExtraSource(searchJson); } catch (JsonProcessingException ex) { LOGGER.warn("Could not apply _search supplied by config", ex.getMessage()); } } LOGGER.debug("Final Search: " + search.internalBuilder().toString()); if (this.queryBuilder != null) { search = search.setQuery(this.queryBuilder); } // If the types are null, then don't specify a type if (this.types != null && this.types.size() > 0) { search = search.setTypes(types.toArray(new String[0])); } // TODO: Replace when all clusters are upgraded past 0.90.4 so we can implement a RANDOM scroll. boolean random = false; if (random) { search = search.addSort(SortBuilders.scriptSort(new Script("random()"), "number")); } } // We don't have a scroll, we need to create a scroll if (scrollResp == null) { scrollResp = search.execute().actionGet(); LOGGER.trace(search.toString()); } } //Iterable methods @Override public Iterator<SearchHit> iterator() { return this; } //Iterator methods @Override public SearchHit next() { return this.next; } @Override public boolean hasNext() { calcNext(); return hasRecords(); } /** * shift to next page of scroll. */ public void calcNext() { try { // We have exhausted our scroll create another scroll. if (scrollPositionInScroll == SCROLL_POSITION_NOT_INITIALIZED || scrollPositionInScroll >= scrollResp.getHits().getHits().length) { // reset the scroll position scrollPositionInScroll = 0; // get the next hits of the scroll scrollResp = elasticsearchClientManager.client() .prepareSearchScroll(scrollResp.getScrollId()) .setScroll(scrollTimeout) .execute() .actionGet(); this.totalHits = scrollResp.getHits().getTotalHits(); } // If this scroll has 0 items then we set the scroll position to -1 // letting the iterator know that we are done. if (scrollResp.getHits().getTotalHits() == 0 || scrollResp.getHits().getHits().length == 0) { scrollPositionInScroll = -1; } else { // get the next record next = scrollResp.getHits().getAt(scrollPositionInScroll); // Increment our counters scrollPositionInScroll += 1; totalRead += 1; } } catch (Exception ex) { LOGGER.error("Unexpected scrolling error: {}", ex.getMessage()); scrollPositionInScroll = -1; next = null; } } public void remove() { } public void cleanUp() { } protected boolean isCompleted() { return totalRead >= this.limit && hasRecords(); } protected boolean hasRecords() { return scrollPositionInScroll != -1 && (!(this.totalRead > this.limit)); } }