/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.exoplatform.services.jcr.impl.core.query.lucene;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import java.util.ArrayList;
import java.util.List;
/**
* <code>JackrabbitQueryParser</code> extends the standard lucene query parser
* and adds JCR specific customizations.
*/
public class JcrQueryParser extends QueryParser
{
/**
* The Jackrabbit synonym provider or <code>null</code> if there is none.
*/
private final SynonymProvider synonymProvider;
/**
* Creates a new query parser instance.
*
* @param fieldName the field name.
* @param analyzer the analyzer.
* @param synonymProvider the synonym provider or <code>null</code> if none
* is available.
*/
public JcrQueryParser(String fieldName, Analyzer analyzer, SynonymProvider synonymProvider)
{
super(Version.LUCENE_36, fieldName, analyzer);
this.synonymProvider = synonymProvider;
setAllowLeadingWildcard(true);
setDefaultOperator(Operator.AND);
}
/**
* {@inheritDoc}
*/
public Query parse(String textsearch) throws ParseException
{
// replace escaped ' with just '
StringBuilder rewritten = new StringBuilder();
// the default lucene query parser recognizes 'AND' and 'NOT' as
// keywords.
textsearch = textsearch.replaceAll("AND", "and");
textsearch = textsearch.replaceAll("NOT", "not");
boolean escaped = false;
for (int i = 0; i < textsearch.length(); i++)
{
if (textsearch.charAt(i) == '\\')
{
if (escaped)
{
rewritten.append("\\\\");
escaped = false;
}
else
{
escaped = true;
}
}
else if (textsearch.charAt(i) == '\'')
{
if (escaped)
{
escaped = false;
}
rewritten.append(textsearch.charAt(i));
}
else if (textsearch.charAt(i) == '~')
{
if (i == 0 || Character.isWhitespace(textsearch.charAt(i - 1)))
{
// escape tilde so we can use it for similarity query
rewritten.append("\\");
}
rewritten.append('~');
}
else
{
if (escaped)
{
rewritten.append('\\');
escaped = false;
}
rewritten.append(textsearch.charAt(i));
}
}
return super.parse(rewritten.toString());
}
/**
* Factory method for generating a synonym query.
* Called when parser parses an input term token that has the synonym
* prefix (~term) prepended.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getSynonymQuery(String field, String termStr, boolean quoted) throws ParseException
{
List<BooleanClause> synonyms = new ArrayList<BooleanClause>();
synonyms.add(new BooleanClause(getFieldQuery(field, termStr, quoted), BooleanClause.Occur.SHOULD));
if (synonymProvider != null)
{
String[] terms = synonymProvider.getSynonyms(termStr);
for (int i = 0; i < terms.length; i++)
{
synonyms.add(new BooleanClause(getFieldQuery(field, terms[i], quoted), BooleanClause.Occur.SHOULD));
}
}
if (synonyms.size() == 1)
{
return synonyms.get(0).getQuery();
}
else
{
return getBooleanQuery(synonyms);
}
}
/**
* {@inheritDoc}
*/
@Override
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException
{
if (queryText.startsWith("~"))
{
// synonym query
return getSynonymQuery(field, queryText.substring(1), quoted);
}
else
{
return super.getFieldQuery(field, queryText, quoted);
}
}
/**
* {@inheritDoc}
*/
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
return getWildcardQuery(field, termStr + "*");
}
/**
* {@inheritDoc}
*/
protected Query getWildcardQuery(String field, String termStr) throws ParseException
{
if (getLowercaseExpandedTerms())
{
termStr = termStr.toLowerCase();
}
return new WildcardQuery(field, null, translateWildcards(termStr));
}
/**
* Translates unescaped wildcards '*' and '?' into '%' and '_'.
*
* @param input the input String.
* @return the translated String.
*/
private String translateWildcards(String input)
{
StringBuilder translated = new StringBuilder(input.length());
boolean escaped = false;
for (int i = 0; i < input.length(); i++)
{
if (input.charAt(i) == '\\')
{
if (escaped)
{
translated.append("\\\\");
escaped = false;
}
else
{
escaped = true;
}
}
else if (input.charAt(i) == '*')
{
if (escaped)
{
translated.append('*');
escaped = false;
}
else
{
translated.append('%');
}
}
else if (input.charAt(i) == '?')
{
if (escaped)
{
translated.append('?');
escaped = false;
}
else
{
translated.append('_');
}
}
else if (input.charAt(i) == '%' || input.charAt(i) == '_')
{
// escape every occurrence of '%' and '_'
escaped = false;
translated.append('\\').append(input.charAt(i));
}
else
{
if (escaped)
{
translated.append('\\');
escaped = false;
}
translated.append(input.charAt(i));
}
}
return translated.toString();
}
}