WildcardQuery.java example

Explorer
jackrabbit-master
- jackrabbit-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;

import org.apache.jackrabbit.core.query.lucene.WildcardTermEnum.TermValueFactory;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.FilteredTermEnum;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.ToStringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.BitSet;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;

/**
 * Implements a wildcard query on a lucene field with an embedded property name
 * and a pattern.
 * <p>
 * Wildcards are:
 * <ul>
 * <li><code>%</code> : matches zero or more characters</li>
 * <li><code>_</code> : matches exactly one character</li>
 * </ul>
 */
@SuppressWarnings("serial")
public class WildcardQuery extends Query implements Transformable {

    /**
     * Logger instance for this class.
     */
    private static final Logger log = LoggerFactory.getLogger(WildcardQuery.class);

    /**
     * Name of the field to search.
     */
    private final String field;

    /**
     * Creates a term value for a given string.
     */
    private final TermValueFactory tvf;

    /**
     * The wildcard pattern.
     */
    private final String pattern;

    /**
     * How property values are transformed before they are matched using the
     * provided pattern.
     */
    private int transform = TRANSFORM_NONE;

    private final PerQueryCache cache;

    /**
     * The standard multi term query to execute wildcard queries. This is only
     * set if the pattern matches less than {@link org.apache.lucene.search.BooleanQuery#maxClauseCount}.
     */
    private Query multiTermQuery;

    /**
     * Creates a new <code>WildcardQuery</code>.
     *
     * @param field     the name of the field to search.
     * @param propName  name of the property to search.
     * @param pattern   the wildcard pattern.
     * @param transform how property values are transformed before they are
     *                  matched using the <code>pattern</code>.
     */
    public WildcardQuery(
            String field, final String propName, String pattern, int transform,
            PerQueryCache cache) {
        this.field = field.intern();
        this.pattern = pattern;
        this.transform = transform;
        this.cache = cache;
        if (propName != null) {
            tvf = new WildcardTermEnum.TermValueFactory() {
                @Override
                public String createValue(String s) {
                    return FieldNames.createNamedValue(propName, s);
                }
            };
        } else {
            tvf = new WildcardTermEnum.TermValueFactory();
        }
    }

    /**
     * Creates a new <code>WildcardQuery</code>.
     *
     * @param field    the name of the field to search.
     * @param propName name of the property to search.
     * @param pattern  the wildcard pattern.
     */
    public WildcardQuery(
            String field, String propName, String pattern, PerQueryCache cache) {
        this(field, propName, pattern, TRANSFORM_NONE, cache);
    }

    /**
     * {@inheritDoc}
     */
    public void setTransformation(int transformation) {
        this.transform = transformation;
    }

    /**
     * Either rewrites this query to a lucene MultiTermQuery or in case of
     * a TooManyClauses exception to a custom jackrabbit query implementation
     * that uses a BitSet to collect all hits.
     *
     * @param reader the index reader to use for the search.
     * @return the rewritten query.
     * @throws IOException if an error occurs while reading from the index.
     */
    @Override
    public Query rewrite(IndexReader reader) throws IOException {
        try {
            multiTermQuery = new StdWildcardQuery(field, tvf, pattern,
                    transform).rewrite(reader);
            return multiTermQuery;
        } catch (BooleanQuery.TooManyClauses e) {
            // MultiTermQuery not possible
            log.debug("Too many terms to enumerate, using custom WildcardQuery.");
            return this;
        }
    }

    /**
     * Creates the <code>Weight</code> for this query.
     *
     * @param searcher the searcher to use for the <code>Weight</code>.
     * @return the <code>Weigth</code> for this query.
     */
    @Override
    public Weight createWeight(Searcher searcher) {
        return new WildcardQueryWeight(searcher, cache);
    }

    /**
     * Returns a string representation of this query.
     *
     * @param field the field name for which to create a string representation.
     * @return a string representation of this query.
     */
    @Override
    public String toString(String field) {
        return field + ":" + tvf.createValue(pattern);
    }

    @Override
    public void extractTerms(Set<Term> terms) {
        if (multiTermQuery != null) {
            multiTermQuery.extractTerms(terms);
        }
    }

    private static class StdWildcardQuery extends MultiTermQuery {

        private final String field;
        private final TermValueFactory tvf;
        private final String pattern;
        private final int transform;

        public StdWildcardQuery(String field, TermValueFactory tvf,
                String pattern, int transform) {
            this.field = field;
            this.tvf = tvf;
            this.pattern = pattern;
            this.transform = transform;
            setRewriteMethod(CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
        }

        @Override
        protected FilteredTermEnum getEnum(IndexReader reader)
                throws IOException {
            return new WildcardTermEnum(reader, field, tvf, pattern, transform);
        }

        /** Prints a user-readable version of this query. */
        @Override
        public String toString(String field) {
            StringBuffer buffer = new StringBuffer();
            buffer.append(field);
            buffer.append(':');
            buffer.append(ToStringUtils.boost(getBoost()));
            return buffer.toString();
        }
    }

    /**
     * The <code>Weight</code> implementation for this <code>WildcardQuery</code>.
     */
    private class WildcardQueryWeight extends AbstractWeight {

        private final PerQueryCache cache;

        /**
         * Creates a new <code>WildcardQueryWeight</code> instance using
         * <code>searcher</code>.
         *
         * @param searcher a <code>Searcher</code> instance.
         */
        public WildcardQueryWeight(Searcher searcher, PerQueryCache cache) {
            super(searcher);
            this.cache = cache;
        }

        /**
         * Creates a {@link WildcardQueryScorer} instance.
         *
         * @param reader index reader
         * @return a {@link WildcardQueryScorer} instance
         */
        protected Scorer createScorer(IndexReader reader, boolean scoreDocsInOrder,
                boolean topScorer) {
            return new WildcardQueryScorer(searcher.getSimilarity(), reader, cache);
        }

        /**
         * Returns this <code>WildcardQuery</code>.
         *
         * @return this <code>WildcardQuery</code>.
         */
        @Override
        public Query getQuery() {
            return WildcardQuery.this;
        }

        @Override
        public float getValue() {
            return 1.0f;
        }

        @Override
        public float sumOfSquaredWeights() throws IOException {
            return 1.0f;
        }

        @Override
        public void normalize(float norm) {
        }

        @Override
        public Explanation explain(IndexReader reader, int doc) throws IOException {
            return new Explanation();
        }
    }

    /**
     * Implements a <code>Scorer</code> for this <code>WildcardQuery</code>.
     */
    private final class WildcardQueryScorer extends Scorer {

        /**
         * The index reader to use for calculating the matching documents.
         */
        private final IndexReader reader;

        /**
         * The documents ids that match this wildcard query.
         */
        private final BitSet hits;

        /**
         * Set to <code>true</code> when the hits have been calculated.
         */
        private boolean hitsCalculated = false;

        /**
         * The next document id to return
         */
        private int nextDoc = -1;

        /**
         * The cache key to use to store the results.
         */
        private final String cacheKey;

        /**
         * The map to store the results.
         */
        private final Map<String, BitSet> resultMap;

        /**
         * Creates a new WildcardQueryScorer.
         *
         * @param similarity the similarity implementation.
         * @param reader     the index reader to use.
         */
        @SuppressWarnings({"unchecked"})
        WildcardQueryScorer(
                Similarity similarity, IndexReader reader,
                PerQueryCache cache) {
            super(similarity);
            this.reader = reader;
            this.cacheKey = field + '\uFFFF' + tvf.createValue('\uFFFF' + pattern) + '\uFFFF' + transform;
            // check cache
            Map<String, BitSet> m = (Map<String, BitSet>) cache.get(WildcardQueryScorer.class, reader);
            if (m == null) {
                m = new HashMap<String, BitSet>();
                cache.put(WildcardQueryScorer.class, reader, m);
            }
            resultMap = m;

            BitSet result = resultMap.get(cacheKey);
            if (result == null) {
                result = new BitSet(reader.maxDoc());
            } else {
                hitsCalculated = true;
            }
            hits = result;
        }

        @Override
        public int nextDoc() throws IOException {
            if (nextDoc == NO_MORE_DOCS) {
                return nextDoc;
            }

            calculateHits();
            nextDoc = hits.nextSetBit(nextDoc + 1);
            if (nextDoc < 0) {
                nextDoc = NO_MORE_DOCS;
            }
            return nextDoc;
        }

        @Override
        public int docID() {
            return nextDoc;
        }

        @Override
        public float score() {
            return 1.0f;
        }

        @Override
        public int advance(int target) throws IOException {
            if (nextDoc == NO_MORE_DOCS) {
                return nextDoc;
            }
            // optimize in the case of an advance to finish.
            // see https://issues.apache.org/jira/browse/JCR-3091
            if (target == NO_MORE_DOCS) {
                nextDoc = NO_MORE_DOCS;
                return nextDoc;
            }

            calculateHits();
            nextDoc = hits.nextSetBit(target);
            if (nextDoc < 0) {
                nextDoc = NO_MORE_DOCS;
            }
            return nextDoc;
        }

        /**
         * Calculates the ids of the documents matching this wildcard query.
         * @throws IOException if an error occurs while reading from the index.
         */
        private void calculateHits() throws IOException {
            if (hitsCalculated) {
                return;
            }
            TermEnum terms = new WildcardTermEnum(reader, field, tvf, pattern, transform);
            try {
                // use unpositioned TermDocs
                TermDocs docs = reader.termDocs();
                try {
                    while (terms.term() != null) {
                        docs.seek(terms);
                        while (docs.next()) {
                            hits.set(docs.doc());
                        }
                        if (!terms.next()) {
                            break;
                        }
                    }
                } finally {
                    docs.close();
                }
            } finally {
                terms.close();
            }
            hitsCalculated = true;
            // put to cache
            resultMap.put(cacheKey, hits);
        }

    }
}