package org.elasticsearch.plugin.nlpcn; import com.alibaba.druid.sql.ast.statement.SQLJoinTableSource; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.client.Client; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.search.sort.SortOrder; import org.nlpcn.es4sql.domain.Field; import org.nlpcn.es4sql.domain.Select; import org.nlpcn.es4sql.domain.Where; import org.nlpcn.es4sql.exception.SqlParseException; import org.nlpcn.es4sql.query.join.HashJoinElasticRequestBuilder; import org.nlpcn.es4sql.query.join.TableInJoinRequestBuilder; import org.nlpcn.es4sql.query.maker.QueryMaker; import java.io.IOException; import java.util.*; /** * Created by Eliran on 22/8/2015. */ public class HashJoinElasticExecutor extends ElasticJoinExecutor { private HashJoinElasticRequestBuilder requestBuilder; private Client client; private boolean useQueryTermsFilterOptimization = false; private final int MAX_RESULTS_FOR_FIRST_TABLE = 100000; HashJoinComparisonStructure hashJoinComparisonStructure; private Set<String> alreadyMatched; public HashJoinElasticExecutor(Client client, HashJoinElasticRequestBuilder requestBuilder) { super(requestBuilder); this.client = client; this.requestBuilder = requestBuilder; this.useQueryTermsFilterOptimization = requestBuilder.isUseTermFiltersOptimization(); this.hashJoinComparisonStructure = new HashJoinComparisonStructure(requestBuilder.getT1ToT2FieldsComparison()); this.alreadyMatched = new HashSet<>(); } public List<InternalSearchHit> innerRun() throws IOException, SqlParseException { Map<String, Map<String, List<Object>>> optimizationTermsFilterStructure = initOptimizationStructure(); updateFirstTableLimitIfNeeded(); TableInJoinRequestBuilder firstTableRequest = requestBuilder.getFirstTable(); createKeyToResultsAndFillOptimizationStructure(optimizationTermsFilterStructure, firstTableRequest); TableInJoinRequestBuilder secondTableRequest = requestBuilder.getSecondTable(); if (needToOptimize(optimizationTermsFilterStructure)) { updateRequestWithTermsFilter(optimizationTermsFilterStructure, secondTableRequest); } List<InternalSearchHit> combinedResult = createCombinedResults(secondTableRequest); int currentNumOfResults = combinedResult.size(); int totalLimit = requestBuilder.getTotalLimit(); if (requestBuilder.getJoinType() == SQLJoinTableSource.JoinType.LEFT_OUTER_JOIN && currentNumOfResults < totalLimit) { String t1Alias = requestBuilder.getFirstTable().getAlias(); String t2Alias = requestBuilder.getSecondTable().getAlias(); //todo: for each till Limit addUnmatchedResults(combinedResult, this.hashJoinComparisonStructure.getAllSearchHits(), requestBuilder.getSecondTable().getReturnedFields(), currentNumOfResults, totalLimit, t1Alias, t2Alias); } if(firstTableRequest.getOriginalSelect().isOrderdSelect()){ Collections.sort(combinedResult,new Comparator<InternalSearchHit>() { @Override public int compare(InternalSearchHit o1, InternalSearchHit o2) { return o1.docId() - o2.docId(); } }); } return combinedResult; } private Map<String, Map<String, List<Object>>> initOptimizationStructure() { Map<String,Map<String, List<Object>>> optimizationTermsFilterStructure = new HashMap<>(); for(String comparisonId: this.hashJoinComparisonStructure.getComparisons().keySet()){ optimizationTermsFilterStructure.put(comparisonId,new HashMap<String, List<Object>>()); } return optimizationTermsFilterStructure; } private void updateFirstTableLimitIfNeeded() { if (requestBuilder.getJoinType() == SQLJoinTableSource.JoinType.LEFT_OUTER_JOIN) { Integer firstTableHintLimit = requestBuilder.getFirstTable().getHintLimit(); int totalLimit = requestBuilder.getTotalLimit(); if (firstTableHintLimit == null || firstTableHintLimit > totalLimit) { requestBuilder.getFirstTable().setHintLimit(totalLimit); } } } private List<InternalSearchHit> createCombinedResults( TableInJoinRequestBuilder secondTableRequest) { List<InternalSearchHit> combinedResult = new ArrayList<>(); int resultIds = 0; int totalLimit = this.requestBuilder.getTotalLimit(); Integer hintLimit = secondTableRequest.getHintLimit(); SearchResponse searchResponse; boolean finishedScrolling; if (hintLimit != null && hintLimit < MAX_RESULTS_ON_ONE_FETCH) { searchResponse = secondTableRequest.getRequestBuilder().setSize(hintLimit).get(); finishedScrolling = true; } else { searchResponse = secondTableRequest.getRequestBuilder() .setScroll(new TimeValue(60000)) .setSize(MAX_RESULTS_ON_ONE_FETCH).get(); //es5.0 no need to scroll again! // searchResponse = client.prepareSearchScroll(searchResponse.getScrollId()).setScroll(new TimeValue(600000)).get(); finishedScrolling = false; } updateMetaSearchResults(searchResponse); boolean limitReached = false; int fetchedSoFarFromSecondTable = 0; while (!limitReached) { SearchHit[] secondTableHits = searchResponse.getHits().getHits(); fetchedSoFarFromSecondTable += secondTableHits.length; for (SearchHit secondTableHit : secondTableHits) { if (limitReached) break; //todo: need to run on comparisons. for each comparison check if exists and add. HashMap<String, List<Map.Entry<Field, Field>>> comparisons = this.hashJoinComparisonStructure.getComparisons(); for (Map.Entry<String, List<Map.Entry<Field, Field>>> comparison : comparisons.entrySet()) { String comparisonID = comparison.getKey(); List<Map.Entry<Field, Field>> t1ToT2FieldsComparison = comparison.getValue(); String key = getComparisonKey(t1ToT2FieldsComparison, secondTableHit, false, null); SearchHitsResult searchHitsResult = this.hashJoinComparisonStructure.searchForMatchingSearchHits(comparisonID, key); if (searchHitsResult != null && searchHitsResult.getSearchHits().size() > 0) { searchHitsResult.setMatchedWithOtherTable(true); List<InternalSearchHit> searchHits = searchHitsResult.getSearchHits(); for (InternalSearchHit matchingHit : searchHits) { String combinedId = matchingHit.id() + "|" + secondTableHit.getId(); //in order to prevent same matching when using OR on hashJoins. if(this.alreadyMatched.contains(combinedId)){ continue; } else { this.alreadyMatched.add(combinedId); } Map<String,Object> copiedSource = new HashMap<String,Object>(); copyMaps(copiedSource,secondTableHit.sourceAsMap()); onlyReturnedFields(copiedSource, secondTableRequest.getReturnedFields(),secondTableRequest.getOriginalSelect().isSelectAll()); InternalSearchHit searchHit = new InternalSearchHit(matchingHit.docId(), combinedId, new Text(matchingHit.getType() + "|" + secondTableHit.getType()), matchingHit.getFields()); searchHit.sourceRef(matchingHit.getSourceRef()); searchHit.sourceAsMap().clear(); searchHit.sourceAsMap().putAll(matchingHit.sourceAsMap()); String t1Alias = requestBuilder.getFirstTable().getAlias(); String t2Alias = requestBuilder.getSecondTable().getAlias(); mergeSourceAndAddAliases(copiedSource, searchHit, t1Alias, t2Alias); combinedResult.add(searchHit); resultIds++; if (resultIds >= totalLimit) { limitReached = true; break; } } } } } if (!finishedScrolling) { if (secondTableHits.length > 0 && (hintLimit == null || fetchedSoFarFromSecondTable >= hintLimit)) { searchResponse = client.prepareSearchScroll(searchResponse.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet(); } else break; } else { break; } } return combinedResult; } private void copyMaps(Map<String, Object> into, Map<String, Object> from) { for(Map.Entry<String,Object> keyAndValue : from.entrySet()) into.put(keyAndValue.getKey(),keyAndValue.getValue()); } private void createKeyToResultsAndFillOptimizationStructure(Map<String,Map<String, List<Object>>> optimizationTermsFilterStructure, TableInJoinRequestBuilder firstTableRequest) { List<SearchHit> firstTableHits = fetchAllHits(firstTableRequest); int resultIds = 1; for (SearchHit hit : firstTableHits) { HashMap<String, List<Map.Entry<Field, Field>>> comparisons = this.hashJoinComparisonStructure.getComparisons(); for (Map.Entry<String, List<Map.Entry<Field, Field>>> comparison : comparisons.entrySet()) { String comparisonID = comparison.getKey(); List<Map.Entry<Field, Field>> t1ToT2FieldsComparison = comparison.getValue(); String key = getComparisonKey(t1ToT2FieldsComparison, hit, true, optimizationTermsFilterStructure.get(comparisonID)); //int docid , id InternalSearchHit searchHit = new InternalSearchHit(resultIds, hit.id(), new Text(hit.getType()), hit.getFields()); searchHit.sourceRef(hit.getSourceRef()); onlyReturnedFields(searchHit.sourceAsMap(), firstTableRequest.getReturnedFields(),firstTableRequest.getOriginalSelect().isSelectAll()); resultIds++; this.hashJoinComparisonStructure.insertIntoComparisonHash(comparisonID, key, searchHit); } } } private List<SearchHit> fetchAllHits(TableInJoinRequestBuilder tableInJoinRequest) { Integer hintLimit = tableInJoinRequest.getHintLimit(); SearchRequestBuilder requestBuilder = tableInJoinRequest.getRequestBuilder(); if (hintLimit != null && hintLimit < MAX_RESULTS_ON_ONE_FETCH) { requestBuilder.setSize(hintLimit); SearchResponse searchResponse = requestBuilder.get(); updateMetaSearchResults(searchResponse); return Arrays.asList(searchResponse.getHits().getHits()); } return scrollTillLimit(tableInJoinRequest, hintLimit); } private List<SearchHit> scrollTillLimit(TableInJoinRequestBuilder tableInJoinRequest, Integer hintLimit) { SearchResponse scrollResp = scrollOneTimeWithMax(client,tableInJoinRequest); updateMetaSearchResults(scrollResp); List<SearchHit> hitsWithScan = new ArrayList<>(); int curentNumOfResults = 0; SearchHit[] hits = scrollResp.getHits().hits(); if (hintLimit == null) hintLimit = MAX_RESULTS_FOR_FIRST_TABLE; while (hits.length != 0 && curentNumOfResults < hintLimit) { curentNumOfResults += hits.length; Collections.addAll(hitsWithScan, hits); if (curentNumOfResults >= MAX_RESULTS_FOR_FIRST_TABLE) { //todo: log or exception? System.out.println("too many results for first table, stoping at:" + curentNumOfResults); break; } scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet(); hits = scrollResp.getHits().getHits(); } return hitsWithScan; } private boolean needToOptimize(Map<String,Map<String, List<Object>>> optimizationTermsFilterStructure) { if(! useQueryTermsFilterOptimization && optimizationTermsFilterStructure != null && optimizationTermsFilterStructure.size() > 0) return false; boolean allEmpty = true; for(Map<String,List<Object>> optimization : optimizationTermsFilterStructure.values()){ if(optimization.size() > 0){ allEmpty = false; break; } } return !allEmpty; } private void updateRequestWithTermsFilter(Map<String,Map<String, List<Object>>> optimizationTermsFilterStructure, TableInJoinRequestBuilder secondTableRequest) throws SqlParseException { Select select = secondTableRequest.getOriginalSelect(); BoolQueryBuilder orQuery = QueryBuilders.boolQuery(); for(Map<String,List<Object>> optimization : optimizationTermsFilterStructure.values()) { BoolQueryBuilder andQuery = QueryBuilders.boolQuery(); for (Map.Entry<String, List<Object>> keyToValues : optimization.entrySet()) { String fieldName = keyToValues.getKey(); List<Object> values = keyToValues.getValue(); andQuery.must(QueryBuilders.termsQuery(fieldName, values)); } orQuery.should(andQuery); } Where where = select.getWhere(); BoolQueryBuilder boolQuery; if (where != null) { boolQuery = QueryMaker.explan(where,false); boolQuery.must(orQuery); } else boolQuery = orQuery; secondTableRequest.getRequestBuilder().setQuery(boolQuery); } private String getComparisonKey(List<Map.Entry<Field, Field>> t1ToT2FieldsComparison, SearchHit hit, boolean firstTable, Map<String, List<Object>> optimizationTermsFilterStructure) { String key = ""; Map<String, Object> sourceAsMap = hit.sourceAsMap(); for (Map.Entry<Field, Field> t1ToT2 : t1ToT2FieldsComparison) { //todo: change to our function find if key contains '.' String name; if (firstTable) name = t1ToT2.getKey().getName(); else name = t1ToT2.getValue().getName(); Object data = deepSearchInMap(sourceAsMap, name); if (firstTable && useQueryTermsFilterOptimization) { updateOptimizationData(optimizationTermsFilterStructure, data, t1ToT2.getValue().getName()); } if (data == null) key += "|null|"; else key += "|" + data.toString() + "|"; } return key; } private void updateOptimizationData(Map<String, List<Object>> optimizationTermsFilterStructure, Object data, String queryOptimizationKey) { List<Object> values = optimizationTermsFilterStructure.get(queryOptimizationKey); if (values == null) { values = new ArrayList<>(); optimizationTermsFilterStructure.put(queryOptimizationKey, values); } if (data instanceof String) { //todo: analyzed or not analyzed check.. data = ((String) data).toLowerCase(); } if(data!=null) values.add(data); } }