/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.flamdex.lucene; import com.google.common.collect.Lists; import com.indeed.flamdex.query.BooleanOp; import com.indeed.flamdex.query.Query; import com.indeed.flamdex.query.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.RangeQuery; import org.apache.lucene.search.TermQuery; import java.util.Arrays; import java.util.List; import java.util.Set; /** * @author jsgroth */ public final class LuceneQueryTranslator { private LuceneQueryTranslator() {} public static Query rewrite(org.apache.lucene.search.Query q, Set<String> intFields) { if (q instanceof TermQuery) { return rewrite((TermQuery)q, intFields); } else if (q instanceof BooleanQuery) { return rewrite((BooleanQuery)q, intFields); } else if (q instanceof RangeQuery) { return rewrite((RangeQuery)q, intFields); } else if (q instanceof ConstantScoreRangeQuery) { return rewrite((ConstantScoreRangeQuery)q, intFields); } else if (q instanceof PrefixQuery) { return rewrite((PrefixQuery)q, intFields); } else if (q instanceof PhraseQuery) { return rewrite((PhraseQuery)q, intFields); } throw new IllegalArgumentException("unsupported lucene query type: " + q.getClass().getSimpleName()); } public static Query rewrite(PrefixQuery pq, Set<String> intFields) { if (intFields.contains(pq.getPrefix().field())) { // not really sure what to do here, for now just treat it as an inequality query return Query.newRangeQuery(pq.getPrefix().field(), Long.parseLong(pq.getPrefix().text()), Long.MAX_VALUE, true); } else { final String prefix = pq.getPrefix().text(); final String endTerm = prefix.substring(0, prefix.length() - 1) + ((char)((prefix.charAt(prefix.length() - 1) + 1))); return Query.newRangeQuery(pq.getPrefix().field(), prefix, endTerm, false); } } public static Query rewrite(BooleanQuery bq, Set<String> intFields) { boolean hasMust = false; boolean hasMustNot = false; for (final BooleanClause clause : bq.getClauses()) { if (clause.getOccur() == BooleanClause.Occur.MUST) { hasMust = true; } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { hasMustNot = true; } } if (hasMustNot) { final List<Query> nots = Lists.newArrayList(); final List<Query> other = Lists.newArrayList(); for (final BooleanClause clause : bq.getClauses()) { if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { final Query query = rewrite(clause.getQuery(), intFields); nots.add(query); } else if (clause.getOccur() == BooleanClause.Occur.MUST || (clause.getOccur() == BooleanClause.Occur.SHOULD && !hasMust)) { final Query query = rewrite(clause.getQuery(), intFields); other.add(query); } } final Query notQuery = Query.newBooleanQuery(BooleanOp.NOT, nots); if (other.isEmpty()) { return notQuery; } else { return Query.newBooleanQuery(BooleanOp.AND, Arrays.asList(Query.newBooleanQuery(hasMust ? BooleanOp.AND : BooleanOp.OR, other), notQuery)); } } else { final List<Query> operands = Lists.newArrayList(); for (final BooleanClause clause : bq.getClauses()) { if (clause.getOccur() == BooleanClause.Occur.MUST || (clause.getOccur() == BooleanClause.Occur.SHOULD && !hasMust)) { final Query query = rewrite(clause.getQuery(), intFields); operands.add(query); } } return Query.newBooleanQuery(hasMust ? BooleanOp.AND : BooleanOp.OR, operands); } } public static Query rewrite(TermQuery tq, Set<String> intFields) { final Term term = rewriteTerm(tq.getTerm(), intFields); return Query.newTermQuery(term); } private static Term rewriteTerm(org.apache.lucene.index.Term lTerm, Set<String> intFields) { final String field = lTerm.field(); final Term term; if (intFields.contains(field)) { term = new Term(field, true, Long.parseLong(lTerm.text()), ""); } else { String termText = lTerm.text(); term = new Term(field, false, 0, termText); } return term; } public static Query rewrite(RangeQuery rq, Set<String> intFields) { final Term startTerm = rewriteTerm(rq.getLowerTerm(), intFields); final Term endTerm = rewriteTerm(rq.getUpperTerm(), intFields); return Query.newRangeQuery(startTerm, endTerm, rq.isInclusive()); } public static Query rewrite(ConstantScoreRangeQuery rq, Set<String> intFields) { final Term startTerm = rewriteTerm(new org.apache.lucene.index.Term(rq.getField(), rq.getLowerVal()), intFields); final Term endTerm = rewriteTerm(new org.apache.lucene.index.Term(rq.getField(), rq.getUpperVal()), intFields); return Query.newRangeQuery(startTerm, endTerm, rq.includesUpper()); } public static Query rewrite(PhraseQuery pq, Set<String> intFields) { final List<Query> termQueries = Lists.newArrayListWithCapacity(pq.getTerms().length); for (final org.apache.lucene.index.Term term : pq.getTerms()) { termQueries.add(Query.newTermQuery(rewriteTerm(term, intFields))); } return Query.newBooleanQuery(BooleanOp.AND, termQueries); } }