/* Copyright (2005-2012) Schibsted ASA * This file is part of Possom. * * Possom is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Possom is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Possom. If not, see <http://www.gnu.org/licenses/>. */ package no.sesat.search.query.token; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import no.sesat.commons.ioc.ContextWrapper; import no.sesat.search.site.config.DocumentLoader; import no.sesat.search.site.Site; import no.sesat.search.site.SiteContext; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import no.sesat.search.site.SiteKeyedFactoryInstantiationException; import org.apache.log4j.Logger; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; /** Responsible for loading and serving all the Regular Expression Token Evaluators. * These regular expression patterns come from the configuration file SearchConstants.REGEXP_EVALUATOR_XMLFILE. * * RegExpEvaluator's are re-used across queries so to cache the compiled patterns. * * @version <tt>$Id$</tt> */ public final class RegExpEvaluatorFactory extends AbstractEvaluatorFactory{ private static final Logger LOG = Logger.getLogger(RegExpEvaluatorFactory.class); private static final String ERR_DOC_BUILDER_CREATION = "Failed to DocumentBuilderFactory.newInstance().newDocumentBuilder()"; /** General properties to all regular expressions configured. **/ private static final int REG_EXP_OPTIONS = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; /** The name of the file where regular expressions for each TokenPredicate will be configured. **/ public static final String REGEXP_EVALUATOR_XMLFILE = "RegularExpressionEvaluators.xml"; // TODO this will leak when sites are redeploy without Possom being restarted. private static final Map<Site,Map<TokenPredicate,RegExpTokenEvaluator>> EVALUATORS = new HashMap<Site,Map<TokenPredicate,RegExpTokenEvaluator>>(); private static final ReentrantReadWriteLock EVALUATORS_LOCK = new ReentrantReadWriteLock(); public RegExpEvaluatorFactory(final Context cxt) throws SiteKeyedFactoryInstantiationException { super(cxt); try{ init(cxt); }catch(ParserConfigurationException pce){ throw new SiteKeyedFactoryInstantiationException(ERR_DOC_BUILDER_CREATION, pce); } } /** Loads the resource SearchConstants.REGEXP_EVALUATOR_XMLFILE containing all regular expression patterns * for all the RegExpTokenEvaluators we will be using. */ private static void init(final Context cxt) throws ParserConfigurationException { final Site site = cxt.getSite(); final Site parent = site.getParent(); final boolean parentUninitialised; try{ EVALUATORS_LOCK.readLock().lock(); // initialise the parent site's configuration parentUninitialised = (null != parent && null == EVALUATORS.get(parent)); }finally{ EVALUATORS_LOCK.readLock().unlock(); } if(parentUninitialised){ init(ContextWrapper.wrap( AbstractEvaluatorFactory.Context.class, parent.getSiteContext(), cxt )); } if(null == EVALUATORS.get(site)){ try{ EVALUATORS_LOCK.writeLock().lock(); // create map entry for this site EVALUATORS.put(site, new HashMap<TokenPredicate,RegExpTokenEvaluator>()); final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); final DocumentBuilder builder = factory.newDocumentBuilder(); final DocumentLoader loader = cxt.newDocumentLoader(cxt, REGEXP_EVALUATOR_XMLFILE, builder); loader.abut(); LOG.info("Parsing " + REGEXP_EVALUATOR_XMLFILE + " started"); final Document doc = loader.getDocument(); assert null != doc : "No document loaded for " + site.getName(); final Element root = doc.getDocumentElement(); if(null != root){ final NodeList evaluators = root.getElementsByTagName("evaluator"); for (int i = 0; i < evaluators.getLength(); ++i) { final Element evaluator = (Element) evaluators.item(i); final String tokenName = evaluator.getAttribute("token"); LOG.info(" ->evaluator@token: " + tokenName); TokenPredicate token; try{ token = TokenPredicateUtility.getTokenPredicate(tokenName); }catch(IllegalArgumentException iae){ LOG.debug(tokenName + " does not exist. Will create it. Underlying exception was " + iae); token = TokenPredicateUtility.createAnonymousTokenPredicate( tokenName); } final boolean queryDep = Boolean.parseBoolean(evaluator.getAttribute("query-dependant")); LOG.info(" ->evaluator@query-dependant: " + queryDep); final Collection<Pattern> compiled = new ArrayList<Pattern>(); final NodeList patterns = evaluator.getElementsByTagName("pattern"); for (int j = 0; j < patterns.getLength(); ++j) { final Element pattern = (Element) patterns.item(j); final String expression = pattern.getFirstChild().getNodeValue(); LOG.info(" --->pattern: " + expression); // (^|\s) or ($|\s) is neccessary to avoid matching fragments of words. final String prefix = expression.startsWith("^") ? "" : "(^|\\s)"; final String suffix = expression.endsWith("$") ? "" : "(\\:|$|\\s)"; // compile pattern final Pattern p = Pattern.compile(prefix + expression + suffix, REG_EXP_OPTIONS); compiled.add(p); } final RegExpTokenEvaluator regExpTokenEvaluator = new RegExpTokenEvaluator(compiled, queryDep); EVALUATORS.get(site).put(token, regExpTokenEvaluator); } } LOG.info("Parsing " + REGEXP_EVALUATOR_XMLFILE + " finished"); }finally{ EVALUATORS_LOCK.writeLock().unlock(); } } } public TokenEvaluator getEvaluator(final TokenPredicate token) throws EvaluationException { TokenEvaluator result; final Context cxt = getContext(); try{ EVALUATORS_LOCK.readLock().lock(); result = EVALUATORS.get(cxt.getSite()).get(token); }finally{ EVALUATORS_LOCK.readLock().unlock(); } if(result == null && null != cxt.getSite().getParent()){ result = instanceOf(ContextWrapper.wrap( Context.class, cxt.getSite().getParent().getSiteContext(), cxt )).getEvaluator(token); } if(null == result || TokenEvaluationEngineImpl.ALWAYS_FALSE_EVALUATOR == result){ // if we cannot find an evaulator, then always fail evaluation. // Rather than encourage a NullPointerException result = TokenEvaluationEngineImpl.ALWAYS_FALSE_EVALUATOR; } return result; } }