/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.flamdex.search; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.indeed.flamdex.api.FlamdexOutOfMemoryException; import com.indeed.flamdex.api.FlamdexReader; import com.indeed.flamdex.datastruct.FastBitSet; import com.indeed.flamdex.datastruct.FastBitSetPooler; import com.indeed.flamdex.datastruct.MockFastBitSetPooler; import com.indeed.flamdex.query.BooleanOp; import com.indeed.flamdex.query.Query; import com.indeed.flamdex.query.QueryType; import com.indeed.flamdex.query.Term; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; /** * @author jsgroth */ public class FlamdexSearcher { private final FlamdexReader r; public FlamdexSearcher(FlamdexReader r) { this.r = r; } // for those who don't care about memory safety public FastBitSet search(final Query query) { final FastBitSet ret = new FastBitSet(r.getNumDocs()); final QueryEvaluator evaluator = rewriteQuery(query); try { evaluator.or(r, ret, new MockFastBitSetPooler()); } catch (FlamdexOutOfMemoryException e) { throw new RuntimeException("wtf, DumbFastBitSetPooler doesn't actually throw this exception", e); } return ret; } public void search(final Query query, final FastBitSet bitSet, final FastBitSetPooler bitSetPooler) throws FlamdexOutOfMemoryException { final QueryEvaluator evaluator = rewriteQuery(query); evaluator.or(r, bitSet, bitSetPooler); } private static QueryEvaluator rewriteQuery(final Query query) { switch (query.getQueryType()) { case TERM: return new TermQueryEvaluator(query.getStartTerm()); case BOOLEAN: if (query.getOperator() == BooleanOp.OR) { return rewriteOr(query); } else { final List<QueryEvaluator> operands = new ArrayList<QueryEvaluator>(query.getOperands().size()); for (final Query operand : query.getOperands()) { operands.add(rewriteQuery(operand)); // oh no, recursion :( } return new BooleanQueryEvaluator(query.getOperator(), operands); } case RANGE: if (query.getStartTerm().isIntField()) { return new IntRangeQueryEvaluator(query.getStartTerm(), query.getEndTerm(), query.isMaxInclusive()); } else { return new StringRangeQueryEvaluator(query.getStartTerm(), query.getEndTerm(), query.isMaxInclusive()); } default: throw new IllegalArgumentException("unrecognized query type: " + query.getQueryType()); } } /* Separate into two categories of operands - term queries - everything else For the term queries, group by field and build a IntTermSetQueryEvaluator or StringTermSetQueryEvaluator. For everything else, use whatever we typically use (call rewriteQuery) */ private static QueryEvaluator rewriteOr(Query query) { final List<QueryEvaluator> operands = new ArrayList<QueryEvaluator>(query.getOperands().size()); final Map<String, List<Query>> stringFieldOperandMap = Maps.newHashMap(); final Map<String, List<Query>> intFieldOperandMap = Maps.newHashMap(); // Split out all the immediate terms that can be turned into an optimized OR query for (final Query operand : query.getOperands()) { if (operand.getQueryType() == QueryType.TERM) { final Term term = operand.getStartTerm(); if (term.isIntField()) { if (!intFieldOperandMap.containsKey(term.getFieldName())) { intFieldOperandMap.put(term.getFieldName(), Lists.<Query>newArrayList()); } intFieldOperandMap.get(term.getFieldName()).add(operand); } else { if (!stringFieldOperandMap.containsKey(term.getFieldName())) { stringFieldOperandMap.put(term.getFieldName(), Lists.<Query>newArrayList()); } stringFieldOperandMap.get(term.getFieldName()).add(operand); } } else { // If they don't fit the bill, go ahead and do the unoptimized bit operands.add(rewriteQuery(operand)); } } // Use those terms that were split out for (final String field : stringFieldOperandMap.keySet()) { final List<Query> queries = stringFieldOperandMap.get(field); final String[] terms = new String[queries.size()]; for (int i=0;i<queries.size();i++) { terms[i] = queries.get(i).getStartTerm().getTermStringVal(); } Arrays.sort(terms); operands.add(new StringTermSetQueryEvaluator(field, terms)); } for (final String field : intFieldOperandMap.keySet()) { final List<Query> queries = intFieldOperandMap.get(field); final long[] terms = new long[queries.size()]; for (int i=0;i<queries.size();i++) { terms[i] = queries.get(i).getStartTerm().getTermIntVal(); } Arrays.sort(terms); operands.add(new IntTermSetQueryEvaluator(field, terms)); } return new BooleanQueryEvaluator(BooleanOp.OR, operands); } }