/* Copyright (2006-2012) Schibsted ASA
* This file is part of Possom.
*
* Possom is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Possom is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Possom. If not, see <http://www.gnu.org/licenses/>.
*/
package no.sesat.search.query.transform;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import no.sesat.search.query.Clause;
import no.sesat.search.query.DefaultOperatorClause;
import no.sesat.search.query.BinaryClause;
import no.sesat.search.query.LeafClause;
import no.sesat.search.query.PhraseClause;
import no.sesat.search.query.token.EvaluationException;
import no.sesat.search.query.token.TokenEvaluationEngine;
import no.sesat.search.query.token.TokenPredicate;
import no.sesat.search.query.transform.TokenMaskQueryTransformerConfig.Mask;
import no.sesat.search.query.transform.TokenMaskQueryTransformerConfig.Position;
import org.apache.log4j.Logger;
/**
* Mask (inclusively or exclusively) terms in the query that
* positionally (prefix or anywhere) contains TokenPredicates.
*
* <b>Note</b> Using <code>position="prefix" predicates="*_MAGIC"</code> is kinda pointless but is often done anyway.
* <b>Note</b> position="prefix" only currently works with single terms. XXX
*
*
*
* @version <tt>$Id$</tt>
*/
public final class TokenMaskQueryTransformer extends AbstractQueryTransformer {
private static final Logger LOG = Logger.getLogger(TokenMaskQueryTransformer.class);
private static final String BLANK = "";
private Set<TokenPredicate> insidePredicates = new HashSet<TokenPredicate>();
private StringBuilder predicateBuilder = new StringBuilder();
private Map<LeafClause,String> leaves = new HashMap<LeafClause,String>();
private static final String ERR_PREFIX_NOT_FOUND = "No such TokenPredicate ";
private final TokenMaskQueryTransformerConfig config;
/**
*
* @param config
*/
public TokenMaskQueryTransformer(final QueryTransformerConfig config){
this.config = (TokenMaskQueryTransformerConfig) config;
}
/** TODO comment me. **/
protected void visitImpl(final BinaryClause clause) {
clause.getFirstClause().accept(this);
if(Position.ANY == config.getPosition() || Mask.INCLUDE == config.getMask()){
clause.getSecondClause().accept(this);
}
}
/** TODO comment me. **/
boolean insideMaskClause = false;
/** TODO comment me. **/
protected void visitImpl(final DefaultOperatorClause clause) {
if(maskClause(clause)){ // XXX must ensure that this won't ignore children's fields
insideMaskClause = true;
clause.getFirstClause().accept(this);
clause.getSecondClause().accept(this);
insideMaskClause = false;
}else{
clause.getFirstClause().accept(this);
if(Position.ANY == config.getPosition() || Mask.INCLUDE == config.getMask()){
clause.getSecondClause().accept(this);
}
}
}
/** don't remove prefix if it is in fact a phrase. **/
protected void visitImpl(final PhraseClause clause) {}
/** TODO comment me. **/
protected void visitImpl(final LeafClause clause) {
// Mask.INCLUDE masks out everything by default
final String transformedTerm = getContext().getTransformedTerms().get(clause);
if(Mask.INCLUDE == config.getMask()){
getContext().getTransformedTerms().put(clause, BLANK);
}
// Do not remove if the query is just the prefix and we're in prefix exclude mode.
if (Mask.INCLUDE == config.getMask() || getContext().getQuery().getTermCount() > 1) {
if(maskField(clause)){
// this resets the the term to the clause's field or term
getContext().getTransformedTerms().put(clause,
Mask.INCLUDE == config.getMask() ? clause.getField() : clause.getTerm());
}else if(insideMaskClause || maskClause(clause)){
getContext().getTransformedTerms().put(clause, Mask.INCLUDE == config.getMask()
? transformedTerm
: BLANK);
}
}
}
/** TODO comment me. **/
protected boolean maskClause(final Clause clause){
boolean transform = false;
boolean check = Position.ANY == config.getPosition();
check |= Position.PREFIX == config.getPosition() && clause == getContext().getQuery().getFirstLeafClause();
if (check) {
final TokenEvaluationEngine engine = getContext().getTokenEvaluationEngine();
for (TokenPredicate predicate : config.getPredicates()) {
try{
if (engine.evaluateClause(predicate, clause)) {
transform = true;
break;
}
}catch(EvaluationException ie){
LOG.error("failed to check predicate" + predicate +" with evaluateClause " + clause);
}
}
}
return transform;
}
/** TODO comment me. **/
protected boolean maskField(final LeafClause clause){
boolean transform = false;
boolean check = Position.ANY == config.getPosition();
check |= Position.PREFIX == config.getPosition() && clause == getContext().getQuery().getFirstLeafClause();
if (check) {
final TokenEvaluationEngine engine = getContext().getTokenEvaluationEngine();
for (TokenPredicate predicate : config.getPredicates()) {
// if the field is the token then mask the field and include the term.
if(null != clause.getField()){
try{
if(engine.evaluateTerm(predicate, clause.getField())){
transform = true;
break;
}
}catch(EvaluationException ie){
LOG.error("failed to check predicate" + predicate +" with evaluateTerm " + clause.getField());
}
}
}
}
return transform;
}
}