/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.exoplatform.services.jcr.impl.core.query.lucene; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.search.FilteredTermEnum; import org.exoplatform.services.log.ExoLogger; import org.exoplatform.services.log.Log; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; /** * Implements a wildcard term enum that optionally supports embedded property * names in lucene term texts. */ class WildcardTermEnum extends FilteredTermEnum implements TransformConstants { private static final Log LOG = ExoLogger.getLogger("exo.jcr.component.core.WildcardTermEnum"); /** * The pattern matcher. */ private final Matcher pattern; /** * The lucene field to search. */ private final String field; /** * The term prefix without wildcards */ private final String prefix; /** * Flag that indicates the end of the term enum. */ private boolean endEnum; /** * The input for the pattern matcher. */ private final OffsetCharSequence input; /** * How terms from the index are transformed. */ private final int transform; /** * Creates a new <code>WildcardTermEnum</code>. * * @param reader the index reader. * @param field the lucene field to search. * @param propName the embedded jcr property name or <code>null</code> if * there is not embedded property name. * @param pattern the pattern to match the values. * @param transform the transformation that should be applied to the term * enum from the index reader. * @throws IOException if an error occurs while reading from * the index. * @throws IllegalArgumentException if <code>transform</code> is not a valid * value. */ public WildcardTermEnum(IndexReader reader, String field, String propName, String pattern, int transform) throws IOException { if (transform < TRANSFORM_NONE || transform > TRANSFORM_UPPER_CASE) { throw new IllegalArgumentException("invalid transform parameter"); } this.field = field; this.transform = transform; int idx = 0; if (transform == TRANSFORM_NONE) { // optimize the term comparison by removing the prefix from the pattern // and therefore use a more precise range scan while (idx < pattern.length() && Character.isLetterOrDigit(pattern.charAt(idx))) { idx++; } if (propName == null) { prefix = pattern.substring(0, idx); } else { prefix = FieldNames.createNamedValue(propName, pattern.substring(0, idx)); } } else { prefix = FieldNames.createNamedValue(propName, ""); } // initialize with prefix as dummy value input = new OffsetCharSequence(prefix.length(), prefix, transform); this.pattern = Util.createRegexp(pattern.substring(idx)).matcher(input); if (transform == TRANSFORM_NONE) { setEnum(reader.terms(new Term(field, prefix))); } else { setEnum(new LowerUpperCaseTermEnum(reader, field, propName, pattern, transform)); } } /** * {@inheritDoc} */ protected boolean termCompare(Term term) { if (transform == TRANSFORM_NONE) { if (term.field() == field && term.text().startsWith(prefix)) { input.setBase(term.text()); return pattern.reset().matches(); } endEnum = true; return false; } else { // pre filtered, no need to check return true; } } /** * {@inheritDoc} */ public float difference() { return 1.0f; } /** * {@inheritDoc} */ protected boolean endEnum() { return endEnum; } //--------------------------< internal >------------------------------------ /** * Implements a term enum which respects the transformation flag and * matches a pattern on the enumerated terms. */ private class LowerUpperCaseTermEnum extends TermEnum { /** * The matching terms */ private final Map<Term, Integer> orderedTerms = new LinkedHashMap<Term, Integer>(); /** * Iterator over all matching terms */ private final Iterator<Term> it; public LowerUpperCaseTermEnum(IndexReader reader, String field, String propName, String pattern, int transform) throws IOException { if (transform != TRANSFORM_LOWER_CASE && transform != TRANSFORM_UPPER_CASE) { throw new IllegalArgumentException("transform"); } // check if pattern never matches boolean neverMatches = false; for (int i = 0; i < pattern.length() && !neverMatches; i++) { if (transform == TRANSFORM_LOWER_CASE) { neverMatches = Character.isUpperCase(pattern.charAt(i)); } else if (transform == TRANSFORM_UPPER_CASE) { neverMatches = Character.isLowerCase(pattern.charAt(i)); } } if (!neverMatches) { // create range scans List<RangeScan> rangeScans = new ArrayList<RangeScan>(2); try { int idx = 0; while (idx < pattern.length() && Character.isLetterOrDigit(pattern.charAt(idx))) { idx++; } String patternPrefix = pattern.substring(0, idx); if (patternPrefix.length() == 0) { // scan full property range String prefix = FieldNames.createNamedValue(propName, ""); String limit = FieldNames.createNamedValue(propName, "\uFFFF"); rangeScans.add(new RangeScan(reader, new Term(field, prefix), new Term(field, limit))); } else { // start with initial lower case StringBuilder lowerLimit = new StringBuilder(patternPrefix.toUpperCase()); lowerLimit.setCharAt(0, Character.toLowerCase(lowerLimit.charAt(0))); String prefix = FieldNames.createNamedValue(propName, lowerLimit.toString()); StringBuilder upperLimit = new StringBuilder(patternPrefix.toLowerCase()); upperLimit.append('\uFFFF'); String limit = FieldNames.createNamedValue(propName, upperLimit.toString()); rangeScans.add(new RangeScan(reader, new Term(field, prefix), new Term(field, limit))); // second scan with upper case start prefix = FieldNames.createNamedValue(propName, patternPrefix.toUpperCase()); upperLimit = new StringBuilder(patternPrefix.toLowerCase()); upperLimit.setCharAt(0, Character.toUpperCase(upperLimit.charAt(0))); upperLimit.append('\uFFFF'); limit = FieldNames.createNamedValue(propName, upperLimit.toString()); rangeScans.add(new RangeScan(reader, new Term(field, prefix), new Term(field, limit))); } // do range scans with pattern matcher for (Iterator<RangeScan> it = rangeScans.iterator(); it.hasNext(); ) { RangeScan scan = it.next(); do { Term t = scan.term(); if (t != null) { input.setBase(t.text()); if (WildcardTermEnum.this.pattern.reset().matches()) { orderedTerms.put(t, new Integer(scan.docFreq())); } } } while (scan.next()); } } finally { // close range scans Iterator<RangeScan> it = rangeScans.iterator(); while (it.hasNext()) { RangeScan scan = it.next(); try { scan.close(); } catch (IOException e) { if (LOG.isTraceEnabled()) { LOG.trace("An exception occurred: " + e.getMessage()); } } } } } it = orderedTerms.keySet().iterator(); getNext(); } /** * The current term in this enum. */ private Term current; /** * {@inheritDoc} */ public boolean next() { getNext(); return current != null; } /** * {@inheritDoc} */ public Term term() { return current; } /** * {@inheritDoc} */ public int docFreq() { Integer docFreq = (Integer) orderedTerms.get(current); return docFreq != null ? docFreq.intValue() : 0; } /** * {@inheritDoc} */ public void close() { // nothing to do here } /** * Sets the current field to the next term in this enum or to * <code>null</code> if there is no next. */ private void getNext() { current = it.hasNext() ? it.next() : null; } } }