WildcardTermEnum.java example

Explorer
jcr-master
- jcr-develop
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.exoplatform.services.jcr.impl.core.query.lucene;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.FilteredTermEnum;
import org.exoplatform.services.log.ExoLogger;
import org.exoplatform.services.log.Log;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;

/**
 * Implements a wildcard term enum that optionally supports embedded property
 * names in lucene term texts.
 */
class WildcardTermEnum extends FilteredTermEnum implements TransformConstants
{

   private static final Log LOG = ExoLogger.getLogger("exo.jcr.component.core.WildcardTermEnum");

   /**
     * The pattern matcher.
     */
    private final Matcher pattern;

    /**
     * The lucene field to search.
     */
    private final String field;

    /**
     * The term prefix without wildcards
     */
    private final String prefix;

    /**
     * Flag that indicates the end of the term enum.
     */
    private boolean endEnum;

    /**
     * The input for the pattern matcher.
     */
    private final OffsetCharSequence input;

    /**
     * How terms from the index are transformed.
     */
    private final int transform;

    /**
     * Creates a new <code>WildcardTermEnum</code>.
     *
     * @param reader    the index reader.
     * @param field     the lucene field to search.
     * @param propName  the embedded jcr property name or <code>null</code> if
     *                  there is not embedded property name.
     * @param pattern   the pattern to match the values.
     * @param transform the transformation that should be applied to the term
     *                  enum from the index reader.
     * @throws IOException              if an error occurs while reading from
     *                                  the index.
     * @throws IllegalArgumentException if <code>transform</code> is not a valid
     *                                  value.
     */
    public WildcardTermEnum(IndexReader reader,
                            String field,
                            String propName,
                            String pattern,
                            int transform) throws IOException {
        if (transform < TRANSFORM_NONE || transform > TRANSFORM_UPPER_CASE) {
            throw new IllegalArgumentException("invalid transform parameter");
        }
        this.field = field;
        this.transform = transform;

        int idx = 0;

        if (transform == TRANSFORM_NONE) {
            // optimize the term comparison by removing the prefix from the pattern
            // and therefore use a more precise range scan
            while (idx < pattern.length()
                    && Character.isLetterOrDigit(pattern.charAt(idx))) {
                idx++;
            }

            if (propName == null) {
                prefix = pattern.substring(0, idx);
            } else {
                prefix = FieldNames.createNamedValue(propName, pattern.substring(0, idx));
            }
        } else {
            prefix = FieldNames.createNamedValue(propName, "");
        }

        // initialize with prefix as dummy value
        input = new OffsetCharSequence(prefix.length(), prefix, transform);
        this.pattern = Util.createRegexp(pattern.substring(idx)).matcher(input);

        if (transform == TRANSFORM_NONE) {
            setEnum(reader.terms(new Term(field, prefix)));
        } else {
            setEnum(new LowerUpperCaseTermEnum(reader, field, propName, pattern, transform));
        }
    }

    /**
     * {@inheritDoc}
     */
    protected boolean termCompare(Term term) {
        if (transform == TRANSFORM_NONE) {
            if (term.field() == field && term.text().startsWith(prefix)) {
                input.setBase(term.text());
                return pattern.reset().matches();
            }
            endEnum = true;
            return false;
        } else {
            // pre filtered, no need to check
            return true;
        }
    }

    /**
     * {@inheritDoc}
     */
    public float difference() {
        return 1.0f;
    }

    /**
     * {@inheritDoc}
     */
    protected boolean endEnum() {
        return endEnum;
    }

    //--------------------------< internal >------------------------------------

    /**
     * Implements a term enum which respects the transformation flag and
     * matches a pattern on the enumerated terms.
     */
    private class LowerUpperCaseTermEnum extends TermEnum {

        /**
         * The matching terms
         */
        private final Map<Term, Integer> orderedTerms = new LinkedHashMap<Term, Integer>();

        /**
         * Iterator over all matching terms
         */
        private final Iterator<Term> it;

        public LowerUpperCaseTermEnum(IndexReader reader,
                                      String field,
                                      String propName,
                                      String pattern,
                                      int transform) throws IOException {
            if (transform != TRANSFORM_LOWER_CASE && transform != TRANSFORM_UPPER_CASE) {
                throw new IllegalArgumentException("transform");
            }

            // check if pattern never matches
            boolean neverMatches = false;
            for (int i = 0; i < pattern.length() && !neverMatches; i++) {
                if (transform == TRANSFORM_LOWER_CASE) {
                    neverMatches = Character.isUpperCase(pattern.charAt(i));
                } else if (transform == TRANSFORM_UPPER_CASE) {
                    neverMatches = Character.isLowerCase(pattern.charAt(i));
                }
            }

            if (!neverMatches) {
                // create range scans
                List<RangeScan> rangeScans = new ArrayList<RangeScan>(2);
                try {
                    int idx = 0;
                    while (idx < pattern.length()
                            && Character.isLetterOrDigit(pattern.charAt(idx))) {
                        idx++;
                    }
                    String patternPrefix = pattern.substring(0, idx);
                    if (patternPrefix.length() == 0) {
                        // scan full property range
                        String prefix = FieldNames.createNamedValue(propName, "");
                        String limit = FieldNames.createNamedValue(propName, "\uFFFF");
                        rangeScans.add(new RangeScan(reader,
                                new Term(field, prefix), new Term(field, limit)));
                    } else {
                        // start with initial lower case
                        StringBuilder lowerLimit = new StringBuilder(patternPrefix.toUpperCase());
                        lowerLimit.setCharAt(0, Character.toLowerCase(lowerLimit.charAt(0)));
                        String prefix = FieldNames.createNamedValue(propName, lowerLimit.toString());

                        StringBuilder upperLimit = new StringBuilder(patternPrefix.toLowerCase());
                        upperLimit.append('\uFFFF');
                        String limit = FieldNames.createNamedValue(propName, upperLimit.toString());
                        rangeScans.add(new RangeScan(reader,
                                new Term(field, prefix), new Term(field, limit)));

                        // second scan with upper case start
                        prefix = FieldNames.createNamedValue(propName, patternPrefix.toUpperCase());
                        upperLimit = new StringBuilder(patternPrefix.toLowerCase());
                        upperLimit.setCharAt(0, Character.toUpperCase(upperLimit.charAt(0)));
                        upperLimit.append('\uFFFF');
                        limit = FieldNames.createNamedValue(propName, upperLimit.toString());
                        rangeScans.add(new RangeScan(reader,
                                new Term(field, prefix), new Term(field, limit)));
                    }

                    // do range scans with pattern matcher
                    for (Iterator<RangeScan> it = rangeScans.iterator(); it.hasNext(); ) {
                        RangeScan scan = it.next();
                        do {
                            Term t = scan.term();
                            if (t != null) {
                                input.setBase(t.text());
                                if (WildcardTermEnum.this.pattern.reset().matches()) {
                                    orderedTerms.put(t, new Integer(scan.docFreq()));
                                }
                            }
                        } while (scan.next());
                    }

                } finally {
                    // close range scans
                    Iterator<RangeScan> it = rangeScans.iterator();
                    while (it.hasNext()) {
                        RangeScan scan = it.next();
                        try 
                        {
                            scan.close();
                        } 
                        catch (IOException e) 
                        {
                           if (LOG.isTraceEnabled())
                           {
                              LOG.trace("An exception occurred: " + e.getMessage());
                           }
                        }
                    }
                }
            }

            it = orderedTerms.keySet().iterator();
            getNext();
        }

        /**
         * The current term in this enum.
         */
        private Term current;

        /**
         * {@inheritDoc}
         */
        public boolean next() {
            getNext();
            return current != null;
        }

        /**
         * {@inheritDoc}
         */
        public Term term() {
            return current;
        }

        /**
         * {@inheritDoc}
         */
        public int docFreq() {
            Integer docFreq = (Integer) orderedTerms.get(current);
            return docFreq != null ? docFreq.intValue() : 0;
        }

        /**
         * {@inheritDoc}
         */
        public void close() {
            // nothing to do here
        }

        /**
         * Sets the current field to the next term in this enum or to
         * <code>null</code> if there is no next.
         */
        private void getNext() {
            current = it.hasNext() ? it.next() : null;
        }
    }
}