/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.exoplatform.services.jcr.impl.core.query.lucene; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.FilteredTermEnum; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Weight; import org.apache.lucene.util.ToStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.BitSet; import java.util.HashMap; import java.util.Map; import java.util.Set; /** * Implements a wildcard query on a lucene field with an embedded property name * and a pattern. * <br> * Wildcards are: * <ul> * <li><code>%</code> : matches zero or more characters</li> * <li><code>_</code> : matches exactly one character</li> * </ul> */ public class WildcardQuery extends Query implements Transformable { /** * The serial version UID */ private static final long serialVersionUID = -376896975523503868L; /** * Logger instance for this class. */ private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.WildcardQuery"); /** * Name of the field to search. */ private final String field; /** * Name of the property to search. */ private final String propName; /** * The wildcard pattern. */ private final String pattern; /** * How property values are transformed before they are matched using the * provided pattern. */ private int transform = TRANSFORM_NONE; /** * The standard multi term query to execute wildcard queries. This is only * set if the pattern matches less than {@link org.apache.lucene.search.BooleanQuery#maxClauseCount}. */ private Query multiTermQuery; /** * Creates a new <code>WildcardQuery</code>. * * @param field the name of the field to search. * @param propName name of the property to search. * @param pattern the wildcard pattern. * @param transform how property values are transformed before they are * matched using the <code>pattern</code>. */ public WildcardQuery(String field, String propName, String pattern, int transform) { this.field = field.intern(); this.propName = propName; this.pattern = pattern; this.transform = transform; } /** * Creates a new <code>WildcardQuery</code>. * * @param field the name of the field to search. * @param propName name of the property to search. * @param pattern the wildcard pattern. */ public WildcardQuery(String field, String propName, String pattern) { this(field, propName, pattern, TRANSFORM_NONE); } /** * {@inheritDoc} */ public void setTransformation(int transformation) { this.transform = transformation; } /** * Either rewrites this query to a lucene MultiTermQuery or in case of * a TooManyClauses exception to a custom jackrabbit query implementation * that uses a BitSet to collect all hits. * * @param reader the index reader to use for the search. * @return the rewritten query. * @throws IOException if an error occurs while reading from the index. */ @Override public Query rewrite(IndexReader reader) throws IOException { @SuppressWarnings("serial") Query stdWildcardQuery = new MultiTermQuery() { @Override protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new WildcardTermEnum(reader, field, propName, pattern, transform); } /** Prints a user-readable version of this query. */ @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); buffer.append(field); buffer.append(':'); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } }; try { multiTermQuery = stdWildcardQuery.rewrite(reader); return multiTermQuery; } catch (BooleanQuery.TooManyClauses e) { // MultiTermQuery not possible log.debug("Too many terms to enumerate, using custom WildcardQuery."); return this; } } /** * Creates the <code>Weight</code> for this query. * * @param searcher the searcher to use for the <code>Weight</code>. * @return the <code>Weigth</code> for this query. */ @Override public Weight createWeight(Searcher searcher) { return new WildcardQueryWeight(searcher); } /** * Returns a string representation of this query. * * @param field the field name for which to create a string representation. * @return a string representation of this query. */ @Override public String toString(String field) { return propName + ":" + pattern; } /** * {@inheritDoc} */ @Override public void extractTerms(Set<Term> terms) { if (multiTermQuery != null) { multiTermQuery.extractTerms(terms); } } /** * The <code>Weight</code> implementation for this <code>WildcardQuery</code>. */ private class WildcardQueryWeight extends AbstractWeight { private static final long serialVersionUID = 4836918187825730908L; /** * Creates a new <code>WildcardQueryWeight</code> instance using * <code>searcher</code>. * * @param searcher a <code>Searcher</code> instance. */ public WildcardQueryWeight(Searcher searcher) { super(searcher); } /** * Creates a {@link WildcardQueryScorer} instance. * * @param reader index reader * @return a {@link WildcardQueryScorer} instance */ @Override protected Scorer createScorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) { return new WildcardQueryScorer(searcher.getSimilarity(), reader); } /** * Returns this <code>WildcardQuery</code>. * * @return this <code>WildcardQuery</code>. */ @Override public Query getQuery() { return WildcardQuery.this; } /** * {@inheritDoc} */ @Override public float getValue() { return 1.0f; } /** * {@inheritDoc} */ @Override public float sumOfSquaredWeights() throws IOException { return 1.0f; } /** * {@inheritDoc} */ @Override public void normalize(float norm) { } /** * {@inheritDoc} */ @Override public Explanation explain(IndexReader reader, int doc) throws IOException { return new Explanation(); } } /** * Implements a <code>Scorer</code> for this <code>WildcardQuery</code>. */ private final class WildcardQueryScorer extends Scorer { /** * The index reader to use for calculating the matching documents. */ private final IndexReader reader; /** * The documents ids that match this wildcard query. */ private final BitSet hits; /** * Set to <code>true</code> when the hits have been calculated. */ private boolean hitsCalculated = false; /** * The next document id to return */ private int nextDoc = -1; /** * The cache key to use to store the results. */ private final String cacheKey; /** * The map to store the results. */ private final Map<String, BitSet> resultMap; /** * Creates a new WildcardQueryScorer. * * @param similarity the similarity implementation. * @param reader the index reader to use. */ WildcardQueryScorer(Similarity similarity, IndexReader reader) { super(similarity); this.reader = reader; this.cacheKey = field + '\uFFFF' + propName + '\uFFFF' + transform + '\uFFFF' + pattern; // check cache PerQueryCache cache = PerQueryCache.getInstance(); @SuppressWarnings("unchecked") Map<String, BitSet> m = (Map<String, BitSet>)cache.get(WildcardQueryScorer.class, reader); if (m == null) { m = new HashMap<String, BitSet>(); cache.put(WildcardQueryScorer.class, reader, m); } resultMap = m; BitSet result = (BitSet)resultMap.get(cacheKey); if (result == null) { result = new BitSet(reader.maxDoc()); } else { hitsCalculated = true; } hits = result; } /** * {@inheritDoc} */ @Override public int nextDoc() throws IOException { if (nextDoc == NO_MORE_DOCS) { return nextDoc; } calculateHits(); nextDoc = hits.nextSetBit(nextDoc + 1); if (nextDoc < 0) { nextDoc = NO_MORE_DOCS; } return nextDoc; } /** * {@inheritDoc} */ @Override public int docID() { return nextDoc; } /** * {@inheritDoc} */ @Override public float score() { return 1.0f; } /** * {@inheritDoc} */ @Override public int advance(int target) throws IOException { if (nextDoc == NO_MORE_DOCS) { return nextDoc; } calculateHits(); nextDoc = hits.nextSetBit(target); if (nextDoc < 0) { nextDoc = NO_MORE_DOCS; } return nextDoc; } /** * Calculates the ids of the documents matching this wildcard query. * @throws IOException if an error occurs while reading from the index. */ private void calculateHits() throws IOException { if (hitsCalculated) { return; } TermEnum terms = new WildcardTermEnum(reader, field, propName, pattern, transform); try { // use unpositioned TermDocs TermDocs docs = reader.termDocs(); try { while (terms.term() != null) { docs.seek(terms); while (docs.next()) { hits.set(docs.doc()); } if (!terms.next()) { break; } } } finally { docs.close(); } } finally { terms.close(); } hitsCalculated = true; // put to cache resultMap.put(cacheKey, hits); } } }