/* Copyright (2005-2012) Schibsted ASA
* This file is part of Possom.
*
* Possom is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Possom is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Possom. If not, see <http://www.gnu.org/licenses/>.
*
* AbstractQueryParser.java
*
* Created on 12 January 2006, 12:33
*
*/
package no.sesat.search.query.parser;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import no.sesat.commons.ioc.BaseContext;
import no.sesat.commons.ioc.ContextWrapper;
import no.sesat.search.query.Clause;
import no.sesat.search.query.Query;
import no.sesat.search.query.QueryStringContext;
import no.sesat.search.query.finder.ParentFinder;
import no.sesat.search.query.parser.alt.Alternation;
import no.sesat.search.query.parser.alt.RotationAlternation;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.log4j.Logger;
/** Abstract helper for implementing a QueryParser
* Provides default implementation to get the query object.
*
* @version $Id$
*
*/
public abstract class AbstractQueryParser implements QueryParser {
// Constants -----------------------------------------------------
public static final String SKIP_REGEX;
public static final String OPERATOR_REGEX;
/** Protected so an .jj file implementing this class can reuse.
**/
protected static final Logger LOG = Logger.getLogger(AbstractQueryParser.class);
private static final Logger PRODUCT_LOG = Logger.getLogger("no.sesat.Product");
/** Error message when the parser tries to parse an empty query string.
***/
protected static final String ERR_EMPTY_CONTEXT
= "The \"QueryParser(QueryParser.Context)\" constructor must be used!";
private static final String ERR_PARSING = "Unable to create RunningQuery's query due to ParseException of ";
// Attributes ----------------------------------------------------
private final Stack<String> methodStack = new Stack<String>();
/** the context this query parser implementation must work against.
***/
protected transient Context context;
/** the resulting query object.
***/
private Query query;
// Static --------------------------------------------------------
static{
// build our skip regular expression
final StringBuilder builder = new StringBuilder();
for(char[] range : QueryParser.SKIP_CHARACTER_RANGES){
builder.append("[\\" + range[0] + "-\\" + range[1] + "]|");
}
// remove trailing '|'
builder.setLength(builder.length() - 1);
// our skip regular expression
SKIP_REGEX = '(' + builder.toString() + ')';
// build our operator regular expression
final StringBuilder operatorRegexpBuilder = new StringBuilder();
operatorRegexpBuilder.append('(');
for (String c : QueryParser.OPERATORS) {
operatorRegexpBuilder.append('"' + Matcher.quoteReplacement(c) + "\"|");
}
operatorRegexpBuilder.setLength(operatorRegexpBuilder.length() - 1);
operatorRegexpBuilder.append(')');
OPERATOR_REGEX = operatorRegexpBuilder.toString();
}
// Constructors --------------------------------------------------
// Public --------------------------------------------------------
/**
* do the actual parsing.
* This method shouldn't be public but that's the way javacc creates it unfortunately.
* @throws ParseException when parsing the inputted query string.
* @return the clause hierarchy ready to wrap a Query around.
*/
public abstract Clause parse() throws ParseException;
/**
* Get the query object.
* A call to this method initiates the parse() method if the query hasn't already been built.
* @return the Query object, ready to use.
*/
public Query getQuery(){
if( query == null ){
if( null == context ){
throw new IllegalStateException(ERR_EMPTY_CONTEXT);
}
final String q = context.getQueryString();
final ParentFinder parentFinder = new ParentFinder();
try{
if( null != q && 0 < q.replaceAll(SKIP_REGEX, "").trim().length() && !"*".equals(q) ){
// Uncomment the following line, and comment the line after than, to disable RotationAlternation.
//final Clause root = parse();
final Clause root = alternate( parse(), parentFinder );
query = createQuery(context.getQueryString(), false, root, parentFinder);
}
}catch(ParseException pe){
LOG.warn(ERR_PARSING + q, pe);
// also let product department know these queries are not working
PRODUCT_LOG.info("<invalid-query type=\"ParseException\">"
+ StringEscapeUtils.escapeXml(q) + "</invalid-query>");
} catch (TokenMgrError tme) {
LOG.error(ERR_PARSING + q, tme);
// also let product department know these queries are not working
PRODUCT_LOG.info("<invalid-query type=\"TokenMgrError\">"
+ StringEscapeUtils.escapeXml(q) + "</invalid-query>");
}
if( query == null ){
final Clause empty = context.createWordClause("",null);
final String qStr = context.getQueryString();
// common post-exception handling.
// * is a special query to search for everything
// and should be treated as a non-blank query despite having crashed the parser.
query = createQuery(qStr, !"*".equals(qStr), empty, parentFinder);
}
}
return query;
}
// Package protected ---------------------------------------------
// Protected -----------------------------------------------------
/** Create a new context the return the argument on any call to its getQueryString() method.
*
* @param input the query string returned from the created context's getQueryString() method.
* @return new content supplying access to query string "input"
*/
protected final Context createContext(final String input){
return ContextWrapper.wrap(
QueryParser.Context.class,
new QueryStringContext(){
public String getQueryString(){
return input;
}
},
context
);
}
/** Debugging method for tracing a method entry.
*
* @param method the name of the method
*/
protected final void enterMethod(final String method, final Token token){
if( LOG.isTraceEnabled() ){
methodStack.push(method);
final StringBuilder sb = new StringBuilder();
for( Iterator it = methodStack.iterator(); it.hasNext(); ){
final String m = (String)it.next();
sb.append("." + m );
}
Token t = token;
while (t != null) {
sb.append(" " + QueryParserImplConstants.tokenImage[t.kind]);
if (token.image != null) {
sb.append("(" + token.image + ")");
}
t = t.next;
}
LOG.trace(sb.toString());
}
}
/**
* Debugging method for tracing a method exit.
*/
protected final void exitMethod(){
if( LOG.isTraceEnabled() ){
methodStack.pop();
}
}
/** Ensure that for every leftChar there is a matching rightChar.
* Otherwise remove all occurences of both leftChar and rightChar.
*
* @param query
* @param leftChar
* @param rightChar
* @return
*/
protected final String balance(String query, final char leftChar, final char rightChar){
int left = 0, right = 0;
final char[] chars = query.toCharArray();
for( int i = 0; i < chars.length; ++i ){
if( chars[i] == leftChar ){ ++left; }
if( chars[i] == rightChar ){ ++right; }
}
if( left != right ){
// uneven amount of (). Ignore all of them then.
query = query.replaceAll("\\" + leftChar, "").replaceAll("\\" + rightChar, "");
ReInit(new StringReader(query));
}
return query;
}
/** Ensure that there are a even number of c characters in the phrase, otherwise remove all occurences of c.
*
* @param query
* @param c the character to ensure has an even occurence count.
* @return unchanged or changes string.
*/
protected final String even(String query, final char c){
int count = 0;
final char[] chars = query.toCharArray();
for( int i = 0; i < chars.length; ++i ){
if( chars[i] == c ){ ++count; }
}
if( count % 2 >0 ){
// uneven amount of (). Ignore all of them then.
query = query.replaceAll("\\" + c, "");
ReInit(new StringReader(query));
}
return query;
}
/** HACK because phone numbers and organisation numbers need to finish
* with a space. SEARCH-672
*
* @param query
* @return
*/
protected final String numberNeedsTrailingSpace(String query){
if( query.length() > 0 && Character.isDigit( query.charAt(query.length()-1) ) ){
query = query + ' ';
ReInit(new StringReader(query));
}
return query;
}
/** HACK because a floating hyphon is interpretted as a NotClause.
* Sesam syntax requires the hyphon to be adjacent, without whitespace, to the next term. SEARCH-3390
*
* @param query
* @return
*/
protected final String fixFloatingHyphon(String query){
if(0 <= query.indexOf(" - ")){
query = query.replaceAll("( )+-( )+", "- ");
ReInit(new StringReader(query));
}
return query;
}
protected abstract void ReInit(Reader reader);
// Private -------------------------------------------------------
private Clause alternate(final Clause original, final ParentFinder parentFinder){
Clause result = original;
for(Alternation alternation : getAlternations(parentFinder)){
result = alternation.alternate(result);
}
return result;
}
private List<Alternation> getAlternations(final ParentFinder parentFinder){
// the list we'll return
final List<Alternation> alternations = new ArrayList<Alternation>();
// the context each alternation will work with
final Alternation.Context cxt = ContextWrapper.wrap(
Alternation.Context.class,
new BaseContext(){
public ParentFinder getParentFinder(){
return parentFinder;
}
},
context);
// create and add each alternation
alternations.add(new RotationAlternation(cxt));
//alternations.add(new FullnameAlternation(cxt)); // disable. see SEARCH-2269
return alternations;
}
private static Query createQuery(
final String string,
final boolean blank,
final Clause root,
final ParentFinder parentFinder){
return AbstractQuery.createQuery(string, blank, root, parentFinder);
/*new AbstractQuery(string){
public Clause getRootClause(){
return root;
}
public ParentFinder getParentFinder(){
return parentFinder;
}
public boolean isBlank(){
return blank;
}
};*/
}
}