/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.exoplatform.services.jcr.impl.core.query.lucene; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.Weight; import java.io.IOException; import java.util.ArrayList; import java.util.BitSet; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; /** * Implements a lucene range query. */ public class RangeQuery extends Query implements Transformable { /** * The serial version UID */ private static final long serialVersionUID = 6232103337968115020L; /** * The lower term. May be <code>null</code> if <code>upperTerm</code> is not * <code>null</code>. */ private Term lowerTerm; /** * The upper term. May be <code>null</code> if <code>lowerTerm</code> is not * <code>null</code>. */ private Term upperTerm; /** * If <code>true</code> the range interval is inclusive. */ private boolean inclusive; /** * How the term enum is transformed before it is compared to lower and upper * term. */ private int transform = TRANSFORM_NONE; /** * The rewritten range query or <code>null</code> if the range spans more * than {@link org.apache.lucene.search.BooleanQuery#maxClauseCount} terms. */ private Query stdRangeQuery; /** * Creates a new RangeQuery. The lower or the upper term may be * <code>null</code>, but not both! * * @param lowerTerm the lower term of the interval, or <code>null</code> * @param upperTerm the upper term of the interval, or <code>null</code>. * @param inclusive if <code>true</code> the interval is inclusive. */ public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive) { this(lowerTerm, upperTerm, inclusive, TRANSFORM_NONE); } /** * Creates a new RangeQuery. The lower or the upper term may be * <code>null</code>, but not both! * * @param lowerTerm the lower term of the interval, or <code>null</code> * @param upperTerm the upper term of the interval, or <code>null</code>. * @param inclusive if <code>true</code> the interval is inclusive. * @param transform how term enums are transformed when read from the index. */ public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive, int transform) { if (lowerTerm == null && upperTerm == null) { throw new IllegalArgumentException("At least one term must be non-null"); } if (lowerTerm != null && upperTerm != null && lowerTerm.field() != upperTerm.field()) { throw new IllegalArgumentException("Both terms must be for the same field"); } // if we have a lowerTerm, start there. otherwise, start at beginning if (lowerTerm != null) { this.lowerTerm = lowerTerm; } else { this.lowerTerm = new Term(upperTerm.field(), ""); } this.upperTerm = upperTerm; this.inclusive = inclusive; this.transform = transform; } /** * {@inheritDoc} */ public void setTransformation(int transformation) { this.transform = transformation; } /** * Tries to rewrite this query into a standard lucene RangeQuery. * This rewrite might fail with a TooManyClauses exception. If that * happens, we use our own implementation. * * @param reader the index reader. * @return the rewritten query or this query if rewriting is not possible. * @throws IOException if an error occurs. */ public Query rewrite(IndexReader reader) throws IOException { if (transform == TRANSFORM_NONE) { Query stdRangeQueryImpl = new TermRangeQuery(lowerTerm.field(), lowerTerm.text(), upperTerm.text(), inclusive, inclusive); try { stdRangeQuery = stdRangeQueryImpl.rewrite(reader); return stdRangeQuery; } catch (BooleanQuery.TooManyClauses e) { // failed, use own implementation return this; } } else { // always use our implementation when we need to transform the // term enum return this; } } /** * Creates the <code>Weight</code> for this query. * * @param searcher the searcher to use for the <code>Weight</code>. * @return the <code>Weigth</code> for this query. */ public Weight createWeight(Searcher searcher) { return new RangeQueryWeight(searcher); } /** * Returns a string representation of this query. * @param field the field name for which to create a string representation. * @return a string representation of this query. */ public String toString(String field) { StringBuilder buffer = new StringBuilder(); if (!getField().equals(field)) { buffer.append(getField()); buffer.append(":"); } buffer.append(inclusive ? "[" : "{"); buffer.append(lowerTerm != null ? lowerTerm.text() : "null"); buffer.append(" TO "); buffer.append(upperTerm != null ? upperTerm.text() : "null"); buffer.append(inclusive ? "]" : "}"); if (getBoost() != 1.0f) { buffer.append("^"); buffer.append(Float.toString(getBoost())); } return buffer.toString(); } /** * {@inheritDoc} */ public void extractTerms(Set<Term> terms) { if (stdRangeQuery != null) { stdRangeQuery.extractTerms(terms); } } /** * Returns the field name for this query. */ private String getField() { return (lowerTerm != null ? lowerTerm.field() : upperTerm.field()); } //--------------------------< RangeQueryWeight >---------------------------- /** * The <code>Weight</code> implementation for this <code>RangeQuery</code>. */ private class RangeQueryWeight extends AbstractWeight { private static final long serialVersionUID = -3768950544626254226L; /** * Creates a new <code>RangeQueryWeight</code> instance using * <code>searcher</code>. * * @param searcher a <code>Searcher</code> instance. */ RangeQueryWeight(Searcher searcher) { super(searcher); } /** * Creates a {@link RangeQueryScorer} instance. * * @param reader index reader * @return a {@link RangeQueryScorer} instance */ @Override protected Scorer createScorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) { return new RangeQueryScorer(searcher.getSimilarity(), reader); }; /** * Returns this <code>RangeQuery</code>. * * @return this <code>RangeQuery</code>. */ @Override public Query getQuery() { return RangeQuery.this; } /** * {@inheritDoc} */ @Override public float getValue() { return 1.0f; } /** * {@inheritDoc} */ @Override public float sumOfSquaredWeights() throws IOException { return 1.0f; } /** * {@inheritDoc} */ @Override public void normalize(float norm) { } /** * {@inheritDoc} */ @Override public Explanation explain(IndexReader reader, int doc) throws IOException { return new Explanation(); } } //------------------------< RangeQueryScorer >------------------------------ /** * Implements a <code>Scorer</code> for this <code>RangeQuery</code>. */ private final class RangeQueryScorer extends Scorer { /** * The index reader to use for calculating the matching documents. */ private final IndexReader reader; /** * The documents ids that match this range query. */ private final BitSet hits; /** * Set to <code>true</code> when the hits have been calculated. */ private boolean hitsCalculated = false; /** * The next document id to return */ private int nextDoc = -1; /** * The cache key to use to store the results. */ private final String cacheKey; /** * The map to store the results. */ private final Map<String, BitSet> resultMap; /** * Creates a new RangeQueryScorer. * @param similarity the similarity implementation. * @param reader the index reader to use. */ @SuppressWarnings({"unchecked"}) RangeQueryScorer(Similarity similarity, IndexReader reader) { super(similarity); this.reader = reader; StringBuilder key = new StringBuilder(); key.append(lowerTerm != null ? lowerTerm.field() : upperTerm.field()); key.append('\uFFFF'); key.append(lowerTerm != null ? lowerTerm.text() : ""); key.append('\uFFFF'); key.append(upperTerm != null ? upperTerm.text() : ""); key.append('\uFFFF'); key.append(inclusive); key.append('\uFFFF'); key.append(transform); this.cacheKey = key.toString(); // check cache PerQueryCache cache = PerQueryCache.getInstance(); Map<String, BitSet> m = (Map<String, BitSet>)cache.get(RangeQueryScorer.class, reader); if (m == null) { m = new HashMap<String, BitSet>(); cache.put(RangeQueryScorer.class, reader, m); } resultMap = m; BitSet result = resultMap.get(cacheKey); if (result == null) { result = new BitSet(reader.maxDoc()); } else { hitsCalculated = true; } hits = result; } @Override public int nextDoc() throws IOException { if (nextDoc == NO_MORE_DOCS) { return nextDoc; } calculateHits(); nextDoc = hits.nextSetBit(nextDoc + 1); if (nextDoc < 0) { nextDoc = NO_MORE_DOCS; } return nextDoc; } @Override public int docID() { return nextDoc; } @Override public float score() { return 1.0f; } @Override public int advance(int target) throws IOException { if (nextDoc == NO_MORE_DOCS) { return nextDoc; } calculateHits(); nextDoc = hits.nextSetBit(target); if (nextDoc < 0) { nextDoc = NO_MORE_DOCS; } return nextDoc; } /** * Calculates the ids of the documents matching this range query. * @throws IOException if an error occurs while reading from the index. */ private void calculateHits() throws IOException { if (hitsCalculated) { return; } String testField = getField(); boolean checkLower = false; if (!inclusive || transform != TRANSFORM_NONE) { // make adjustments to set to exclusive checkLower = true; } int propNameLength = FieldNames.getNameLength(lowerTerm.text()); String namePrefix = ""; if (propNameLength > 0) { namePrefix = lowerTerm.text().substring(0, propNameLength); } List<Term> startTerms = new ArrayList<Term>(2); if (transform == TRANSFORM_NONE || lowerTerm.text().length() <= propNameLength) { // use lowerTerm as is startTerms.add(lowerTerm); } else { // first enumerate terms using lower case start character StringBuilder termText = new StringBuilder(propNameLength + 1); termText.append(lowerTerm.text().subSequence(0, propNameLength)); char startCharacter = lowerTerm.text().charAt(propNameLength); termText.append(Character.toLowerCase(startCharacter)); startTerms.add(new Term(lowerTerm.field(), termText.toString())); // second enumerate terms using upper case start character termText.setCharAt(termText.length() - 1, Character.toUpperCase(startCharacter)); startTerms.add(new Term(lowerTerm.field(), termText.toString())); } for (Term startTerm : startTerms) { TermEnum terms = reader.terms(startTerm); try { TermDocs docs = reader.termDocs(); try { do { Term term = terms.term(); if (term != null && term.field() == testField && term.text().startsWith(namePrefix)) { if (checkLower) { int compare = termCompare(term.text(), lowerTerm.text(), propNameLength); if (compare > 0 || compare == 0 && inclusive) { // do not check lower term anymore if no // transformation is done on the term enum checkLower = transform != TRANSFORM_NONE; } else { // continue with next term continue; } } if (upperTerm != null) { int compare = termCompare(term.text(), upperTerm.text(), propNameLength); // if beyond the upper term, or is exclusive and // this is equal to the upper term if ((compare > 0) || (!inclusive && compare == 0)) { // only break out if no transformation // was done on the term from the enum if (transform == TRANSFORM_NONE) { break; } else { // because of the transformation // it is possible that the next // term will be included again if // we still enumerate on the same // property name if (term.text().startsWith(namePrefix)) { continue; } else { break; } } } } docs.seek(terms); while (docs.next()) { hits.set(docs.doc()); } } else { break; } } while (terms.next()); } finally { docs.close(); } } finally { terms.close(); } } hitsCalculated = true; // put to cache resultMap.put(cacheKey, hits); } /** * Compares the <code>text</code> with the <code>other</code> String. This * implementation behaves like {@link String#compareTo(Object)} but also * respects the {@link RangeQuery#transform} property. * * @param text the text to compare to <code>other</code>. The * transformation function is applied to this parameter before * it is compared to <code>other</code>. * @param other the other String. * @param offset start comparing the two strings at <code>offset</code>. * @return see {@link String#compareTo(Object)}. But also respects {@link * RangeQuery#transform}. */ private int termCompare(String text, String other, int offset) { OffsetCharSequence seq1 = new OffsetCharSequence(offset, text, transform); OffsetCharSequence seq2 = new OffsetCharSequence(offset, other); return seq1.compareTo(seq2); } } }