WildcardQuery.java example

Explorer
jcr-master
- jcr-develop
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.exoplatform.services.jcr.impl.core.query.lucene;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilteredTermEnum;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ToStringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
 * Implements a wildcard query on a lucene field with an embedded property name
 * and a pattern.
 * <br>
 * Wildcards are:
 * <ul>
 * <li><code>%</code> : matches zero or more characters</li>
 * <li><code>_</code> : matches exactly one character</li>
 * </ul>
 */
public class WildcardQuery extends Query implements Transformable
{

   /**
    * The serial version UID
    */
   private static final long serialVersionUID = -376896975523503868L;

   /**
    * Logger instance for this class.
    */
   private static final Logger log = LoggerFactory.getLogger("exo.jcr.component.core.WildcardQuery");

   /**
    * Name of the field to search.
    */
   private final String field;

   /**
    * Name of the property to search.
    */
   private final String propName;

   /**
    * The wildcard pattern.
    */
   private final String pattern;

   /**
    * How property values are transformed before they are matched using the
    * provided pattern.
    */
   private int transform = TRANSFORM_NONE;

   /**
    * The standard multi term query to execute wildcard queries. This is only
    * set if the pattern matches less than {@link org.apache.lucene.search.BooleanQuery#maxClauseCount}.
    */
   private Query multiTermQuery;

   /**
    * Creates a new <code>WildcardQuery</code>.
    *
    * @param field     the name of the field to search.
    * @param propName  name of the property to search.
    * @param pattern   the wildcard pattern.
    * @param transform how property values are transformed before they are
    *                  matched using the <code>pattern</code>.
    */
   public WildcardQuery(String field, String propName, String pattern, int transform)
   {
      this.field = field.intern();
      this.propName = propName;
      this.pattern = pattern;
      this.transform = transform;
   }

   /**
    * Creates a new <code>WildcardQuery</code>.
    *
    * @param field    the name of the field to search.
    * @param propName name of the property to search.
    * @param pattern  the wildcard pattern.
    */
   public WildcardQuery(String field, String propName, String pattern)
   {
      this(field, propName, pattern, TRANSFORM_NONE);
   }

   /**
    * {@inheritDoc}
    */
   public void setTransformation(int transformation)
   {
      this.transform = transformation;
   }

   /**
    * Either rewrites this query to a lucene MultiTermQuery or in case of
    * a TooManyClauses exception to a custom jackrabbit query implementation
    * that uses a BitSet to collect all hits.
    *
    * @param reader the index reader to use for the search.
    * @return the rewritten query.
    * @throws IOException if an error occurs while reading from the index.
    */
   @Override
   public Query rewrite(IndexReader reader) throws IOException
   {
      @SuppressWarnings("serial")
      Query stdWildcardQuery = new MultiTermQuery()
      {
         @Override
         protected FilteredTermEnum getEnum(IndexReader reader) throws IOException
         {
            return new WildcardTermEnum(reader, field, propName, pattern, transform);
         }

         /** Prints a user-readable version of this query. */
         @Override
         public String toString(String field)
         {
            StringBuilder buffer = new StringBuilder();
            buffer.append(field);
            buffer.append(':');
            buffer.append(ToStringUtils.boost(getBoost()));
            return buffer.toString();
         }
      };
      try
      {
         multiTermQuery = stdWildcardQuery.rewrite(reader);
         return multiTermQuery;
      }
      catch (BooleanQuery.TooManyClauses e)
      {
         // MultiTermQuery not possible
         log.debug("Too many terms to enumerate, using custom WildcardQuery.");
         return this;
      }
   }

   /**
    * Creates the <code>Weight</code> for this query.
    *
    * @param searcher the searcher to use for the <code>Weight</code>.
    * @return the <code>Weigth</code> for this query.
    */
   @Override
   public Weight createWeight(Searcher searcher)
   {
      return new WildcardQueryWeight(searcher);
   }

   /**
    * Returns a string representation of this query.
    *
    * @param field the field name for which to create a string representation.
    * @return a string representation of this query.
    */
   @Override
   public String toString(String field)
   {
      return propName + ":" + pattern;
   }

   /**
    * {@inheritDoc}
    */
   @Override
   public void extractTerms(Set<Term> terms)
   {
      if (multiTermQuery != null)
      {
         multiTermQuery.extractTerms(terms);
      }
   }

   /**
    * The <code>Weight</code> implementation for this <code>WildcardQuery</code>.
    */
   private class WildcardQueryWeight extends AbstractWeight
   {

      private static final long serialVersionUID = 4836918187825730908L;

      /**
       * Creates a new <code>WildcardQueryWeight</code> instance using
       * <code>searcher</code>.
       *
       * @param searcher a <code>Searcher</code> instance.
       */
      public WildcardQueryWeight(Searcher searcher)
      {
         super(searcher);
      }

      /**
       * Creates a {@link WildcardQueryScorer} instance.
       *
       * @param reader index reader
       * @return a {@link WildcardQueryScorer} instance
       */
      @Override
      protected Scorer createScorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer)
      {
         return new WildcardQueryScorer(searcher.getSimilarity(), reader);
      }

      /**
       * Returns this <code>WildcardQuery</code>.
       *
       * @return this <code>WildcardQuery</code>.
       */
      @Override
      public Query getQuery()
      {
         return WildcardQuery.this;
      }

      /**
       * {@inheritDoc}
       */
      @Override
      public float getValue()
      {
         return 1.0f;
      }

      /**
       * {@inheritDoc}
       */
      @Override
      public float sumOfSquaredWeights() throws IOException
      {
         return 1.0f;
      }

      /**
       * {@inheritDoc}
       */
      @Override
      public void normalize(float norm)
      {
      }

      /**
       * {@inheritDoc}
       */
      @Override
      public Explanation explain(IndexReader reader, int doc) throws IOException
      {
         return new Explanation();
      }
   }

   /**
    * Implements a <code>Scorer</code> for this <code>WildcardQuery</code>.
    */
   private final class WildcardQueryScorer extends Scorer
   {

      /**
       * The index reader to use for calculating the matching documents.
       */
      private final IndexReader reader;

      /**
       * The documents ids that match this wildcard query.
       */
      private final BitSet hits;

      /**
       * Set to <code>true</code> when the hits have been calculated.
       */
      private boolean hitsCalculated = false;

      /**
       * The next document id to return
       */
      private int nextDoc = -1;

      /**
       * The cache key to use to store the results.
       */
      private final String cacheKey;

      /**
       * The map to store the results.
       */
      private final Map<String, BitSet> resultMap;

      /**
       * Creates a new WildcardQueryScorer.
       *
       * @param similarity the similarity implementation.
       * @param reader     the index reader to use.
       */
      WildcardQueryScorer(Similarity similarity, IndexReader reader)
      {
         super(similarity);
         this.reader = reader;
         this.cacheKey = field + '\uFFFF' + propName + '\uFFFF' + transform + '\uFFFF' + pattern;
         // check cache
         PerQueryCache cache = PerQueryCache.getInstance();
         @SuppressWarnings("unchecked")
         Map<String, BitSet> m = (Map<String, BitSet>)cache.get(WildcardQueryScorer.class, reader);
         if (m == null)
         {
            m = new HashMap<String, BitSet>();
            cache.put(WildcardQueryScorer.class, reader, m);
         }
         resultMap = m;

         BitSet result = (BitSet)resultMap.get(cacheKey);
         if (result == null)
         {
            result = new BitSet(reader.maxDoc());
         }
         else
         {
            hitsCalculated = true;
         }
         hits = result;
      }

      /**
       * {@inheritDoc}
       */
      @Override
      public int nextDoc() throws IOException
      {
         if (nextDoc == NO_MORE_DOCS)
         {
            return nextDoc;
         }

         calculateHits();
         nextDoc = hits.nextSetBit(nextDoc + 1);
         if (nextDoc < 0)
         {
            nextDoc = NO_MORE_DOCS;
         }
         return nextDoc;
      }

      /**
       * {@inheritDoc}
       */
      @Override
      public int docID()
      {
         return nextDoc;
      }

      /**
       * {@inheritDoc}
       */
      @Override
      public float score()
      {
         return 1.0f;
      }

      /**
       * {@inheritDoc}
       */
      @Override
      public int advance(int target) throws IOException
      {
         if (nextDoc == NO_MORE_DOCS)
         {
            return nextDoc;
         }

         calculateHits();
         nextDoc = hits.nextSetBit(target);
         if (nextDoc < 0)
         {
            nextDoc = NO_MORE_DOCS;
         }
         return nextDoc;
      }

      /**
       * Calculates the ids of the documents matching this wildcard query.
       * @throws IOException if an error occurs while reading from the index.
       */
      private void calculateHits() throws IOException
      {
         if (hitsCalculated)
         {
            return;
         }
         TermEnum terms = new WildcardTermEnum(reader, field, propName, pattern, transform);
         try
         {
            // use unpositioned TermDocs
            TermDocs docs = reader.termDocs();
            try
            {
               while (terms.term() != null)
               {
                  docs.seek(terms);
                  while (docs.next())
                  {
                     hits.set(docs.doc());
                  }
                  if (!terms.next())
                  {
                     break;
                  }
               }
            }
            finally
            {
               docs.close();
            }
         }
         finally
         {
            terms.close();
         }
         hitsCalculated = true;
         // put to cache
         resultMap.put(cacheKey, hits);
      }

   }
}