/* Copyright (2005-2012) Schibsted ASA * This file is part of Possom. * * Possom is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Possom is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Possom. If not, see <http://www.gnu.org/licenses/>. */ package no.sesat.search.query.analyser; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.xml.parsers.DocumentBuilderFactory; import no.sesat.commons.ioc.BaseContext; import no.sesat.commons.ioc.ContextWrapper; import no.sesat.search.site.config.DocumentLoader; import no.sesat.search.query.token.TokenPredicate; import no.sesat.search.site.SiteKeyedFactory; import org.apache.commons.collections.Predicate; import org.apache.commons.collections.PredicateUtils; import java.util.HashMap; import java.util.Map; import java.util.Properties; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; import no.sesat.search.query.QueryStringContext; import no.sesat.search.query.token.AbstractEvaluatorFactory; import no.sesat.search.query.token.EvaluatorType; import no.sesat.search.query.token.TokenPredicateUtility; import no.sesat.search.site.config.PropertiesLoader; import no.sesat.search.site.config.ResourceContext; import no.sesat.search.site.config.UrlResourceLoader; import no.sesat.search.site.Site; import no.sesat.search.site.SiteContext; import org.apache.log4j.Logger; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** Responsible for loading and serving all the AnalysisRule instances. * These rules consisting of score sets come from the configuration file SearchConstants.ANALYSIS_RULES_XMLFILE. * Rules are inherited on a per-rule basis. Global predicates are inherited as well. Inherited global predicates can * only be overidden by global predicates. Private predicates does no * * * * @version <tt>$Revision$</tt> */ public final class AnalysisRuleFactory implements SiteKeyedFactory{ /** * The context the AnalysisRuleFactory must work against. * */ public interface Context extends BaseContext, ResourceContext, SiteContext { String getUniqueId(); } private static final Logger LOG = Logger.getLogger(AnalysisRuleFactory.class); private static final String ERR_DOC_BUILDER_CREATION = "Failed to DocumentBuilderFactory.newInstance().newDocumentBuilder()"; private static final String ERR_UNABLE_TO_FIND_PREDICATE = "Unable to find predicate with id "; private static final String ERR_UNABLE_TO_FIND_PREDICATE_UTILS_METHOD = "Unable to find method PredicateUtils."; private static final String ERR_UNABLE_TO_USE_PREDICATE_UTILS_METHOD = "Unable to use method PredicateUtils."; private static final String ERR_WHILE_READING_ELEMENT = "Error while reading element "; private static final String ERR_TOO_MANY_PREDICATES_IN_NOT = "Illegal to have more than one predicate inside a <not> element. Occurred under "; private static final String WARN_RULE_NOT_FOUND = "Unable to find rule: "; private static final String DEBUG_CREATED_PREDICATE = "Parsed predicate "; private static final String DEBUG_STARTING_RULE = "Parsing rule "; private static final String DEBUG_FINISHED_RULE = "Parsed rule "; private static final AnalysisRule DUMB_RULE = new AnalysisRule(); /** * */ private static final Map<Site,AnalysisRuleFactory> INSTANCES = new HashMap<Site,AnalysisRuleFactory>(); private static final ReentrantReadWriteLock INSTANCES_LOCK = new ReentrantReadWriteLock(); /** Name of the configuration file. **/ public static final String ANALYSIS_RULES_XMLFILE = "AnalysisRules.xml"; private final Map<String, Predicate> globalPredicates = new HashMap<String, Predicate>(); private final Map<String,AnalysisRule> rules = new HashMap<String,AnalysisRule>(); private final ReentrantReadWriteLock rulesLock = new ReentrantReadWriteLock(); private final Context context; private final DocumentLoader loader; private volatile boolean init = false; private AnalysisRuleFactory(final Context cxt) throws ParserConfigurationException { context = cxt; try{ INSTANCES_LOCK.writeLock().lock(); final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(false); final DocumentBuilder builder = factory.newDocumentBuilder(); loader = context.newDocumentLoader(cxt, ANALYSIS_RULES_XMLFILE, builder); INSTANCES.put(context.getSite(), this); }finally{ INSTANCES_LOCK.writeLock().unlock(); } } private void init() { if (!init) { loader.abut(); LOG.info("Parsing " + ANALYSIS_RULES_XMLFILE + " started for " + context.getSite()); final Document doc = loader.getDocument(); assert null != doc : "No document loaded for " + context.getSite().getName(); final Element root = doc.getDocumentElement(); final Map<String, Predicate> inheritedPredicates = getInheritedPredicates(); if( null != root) { // initialise anonymous predicate final String evaluatorTypes = root.getAttribute("evaluators"); if(null != evaluatorTypes && 0 < evaluatorTypes.length()){ for(String name : evaluatorTypes.split(",")){ final String factoryName = name; AbstractEvaluatorFactory.instanceOf( ContextWrapper.wrap( AbstractEvaluatorFactory.Context.class, context, new BaseContext() { public String getEvaluatorFactoryClassName() { return factoryName; } public String getUniqueId(){ return context.getUniqueId(); } }, new QueryStringContext() { public String getQueryString() { return "*"; } } )); } } readPredicates(root, globalPredicates, inheritedPredicates); // ruleList final NodeList ruleList = root.getElementsByTagName("rule"); for (int i = 0; i < ruleList.getLength(); ++i) { final Element rule = (Element) ruleList.item(i); final String id = rule.getAttribute("id"); final AnalysisRule analysisRule = new AnalysisRule(); LOG.info(DEBUG_STARTING_RULE + id + " " + analysisRule); // private predicates final Map<String, Predicate> privatePredicates = new HashMap<String, Predicate>(globalPredicates); readPredicates(rule, privatePredicates, inheritedPredicates); // scores final NodeList scores = rule.getElementsByTagName("score"); for (int j = 0; j < scores.getLength(); ++j) { final Element score = (Element) scores.item(j); final String predicateName = score.getAttribute("predicate"); final Predicate predicate = findPredicate(predicateName, privatePredicates, inheritedPredicates); final int scoreValue = Integer.parseInt(score.getFirstChild().getNodeValue()); analysisRule.addPredicateScore(predicate, scoreValue); final Map<Predicate,String> predicateToNameMap = new HashMap<Predicate,String>(); for( String key : inheritedPredicates.keySet()){ predicateToNameMap.put(inheritedPredicates.get(key), key); } for( String key : privatePredicates.keySet()){ predicateToNameMap.put(privatePredicates.get(key), key); } analysisRule.setPredicateNameMap(Collections.unmodifiableMap(predicateToNameMap)); } try{ rulesLock.writeLock().lock(); rules.put(id, analysisRule); }finally{ rulesLock.writeLock().unlock(); } LOG.info(DEBUG_FINISHED_RULE + id + " " + analysisRule); } } LOG.info("Parsing " + ANALYSIS_RULES_XMLFILE + " finished"); } init = true; } private AnalysisRuleFactory getParentFactory() { if (null != context.getSite().getParent()) { return instanceOf(ContextWrapper.wrap( Context.class, new SiteContext() { public Site getSite() { return context.getSite().getParent(); } }, context)); } return null; } private Map<String, Predicate> readPredicates( final Element element, final Map<String, Predicate> predicateMap, final Map<String, Predicate> inheritedPredicates){ final NodeList predicates = element.getChildNodes(); for (int i = 0; i < predicates.getLength(); ++i) { final Node node = predicates.item(i); if (node instanceof Element) { final Element e = (Element) node; if ("predicate".equals(e.getTagName())) { readPredicate(e, predicateMap, inheritedPredicates); } } } return predicateMap; } private Predicate readPredicate( final Element element, final Map<String, Predicate> predicateMap, final Map<String, Predicate> inheritedPredicates) { Predicate result = null; final boolean hasId = element.hasAttribute("id"); final boolean hasContent = element.hasChildNodes(); if (hasId && !hasContent) { // it's an already defined predicate final String id = element.getAttribute("id"); result = findPredicate(id, predicateMap, inheritedPredicates); } else { // we must create it final NodeList operators = element.getChildNodes(); for (int i = 0; i < operators.getLength(); ++i) { final Node operator = operators.item(i); if (operator != null && operator instanceof Element) { result = createPredicate((Element) operator, predicateMap, inheritedPredicates); break; } } if (hasId) { // its got an ID so we must remember it. final String id = element.getAttribute("id"); predicateMap.put(id, result); LOG.debug(DEBUG_CREATED_PREDICATE + id + " " + result); } } return result; } private Predicate findPredicate( final String name, final Map<String, Predicate> predicateMap, final Map<String, Predicate> parentPredicateMap) { Predicate result = null; // first check our predicateMap if (predicateMap.containsKey(name)) { result = predicateMap.get(name); } else if (parentPredicateMap.containsKey(name)) { result = parentPredicateMap.get(name); } else { // second check TokenPredicate's Categories, anonymous predicates, and exact peers. if(name.startsWith(TokenPredicate.EXACT_PREFIX)){ result = TokenPredicateUtility .getTokenPredicate(name.replaceFirst(TokenPredicate.EXACT_PREFIX, "")) .exactPeer(); }else{ result = TokenPredicateUtility.getTokenPredicate(name); } } return result; } private Predicate createPredicate(final Element element, final Map predicateMap, final Map inheritedPredicates) { Predicate result = null; // The operator to use from PredicateUtils. // The replaceAll's are so we end up with a method with one Predicate[] argument. final String methodName = element.getTagName() .replaceAll("and", "all") .replaceAll("or", "any") .replaceAll("either", "one") .replaceAll("neither", "none") + "Predicate"; // because we can't use the above operator methods with only one child predicate // the not operator must be a special case. final boolean notPredicate = "not".equals(element.getTagName()); try { // Find PredicateUtils static method through reflection final Method method = notPredicate ? null : PredicateUtils.class.getMethod(methodName, new Class[]{Collection.class}); // load all the predicates it will apply to final List childPredicates = new LinkedList(); final NodeList predicates = element.getChildNodes(); for (int i = 0; i < predicates.getLength(); ++i) { final Node node = predicates.item(i); if (node instanceof Element) { final Element e = (Element) node; if ("predicate".equals(e.getTagName())) { childPredicates.add(readPredicate(e, predicateMap, inheritedPredicates)); } } } if (notPredicate) { // there should only be one in the list if (childPredicates.size() > 1) { throw new IllegalStateException(ERR_TOO_MANY_PREDICATES_IN_NOT + element.getParentNode()); } result = PredicateUtils.notPredicate((Predicate) childPredicates.get(0)); } else { // use the operator through reflection result = (Predicate) method.invoke(null, new Object[]{childPredicates}); } } catch (SecurityException ex) { LOG.error(ERR_WHILE_READING_ELEMENT + element); LOG.error(ERR_UNABLE_TO_FIND_PREDICATE_UTILS_METHOD + methodName, ex); } catch (NoSuchMethodException ex) { LOG.error(ERR_WHILE_READING_ELEMENT + element); LOG.error(ERR_UNABLE_TO_FIND_PREDICATE_UTILS_METHOD + methodName, ex); } catch (IllegalAccessException ex) { LOG.error(ERR_WHILE_READING_ELEMENT + element); LOG.error(ERR_UNABLE_TO_USE_PREDICATE_UTILS_METHOD + methodName, ex); } catch (InvocationTargetException ex) { LOG.error(ERR_WHILE_READING_ELEMENT + element); LOG.error(ERR_UNABLE_TO_USE_PREDICATE_UTILS_METHOD + methodName, ex); } catch (IllegalArgumentException ex) { LOG.error(ERR_WHILE_READING_ELEMENT + element); LOG.error(ERR_UNABLE_TO_USE_PREDICATE_UTILS_METHOD + methodName, ex); } return result; } public Map<String,AnalysisRule> getRulesMap(){ LOG.trace("getRulesMap()"); init(); final Map<String,AnalysisRule> result = new HashMap<String,AnalysisRule>(); try{ rulesLock.readLock().lock(); result.putAll(rules); }finally{ rulesLock.readLock().unlock(); } if(null != context.getSite().getParent()) { result.putAll(instanceOf(ContextWrapper.wrap( Context.class, new SiteContext() { public Site getSite() { return context.getSite().getParent(); } }, context )).getRulesMap()); } return result; } /** * * Returns the rule with the name <code>ruleName</code>. * * @param ruleName the name of the rule * @return the rule. */ public AnalysisRule getRule(final String ruleName) { LOG.trace("getRule(" + ruleName + ')'); init(); AnalysisRule rule = null; try{ rulesLock.readLock().lock(); rule = rules.get(ruleName); }finally{ rulesLock.readLock().unlock(); } if(rule == null && null != context.getSite().getParent()) { rule = instanceOf(ContextWrapper.wrap( Context.class, new SiteContext() { public Site getSite() { return context.getSite().getParent(); } }, context )).getRule(ruleName); if (rule == null) { // if we cannot find an rule, then use the dumb guy that never scores. // Rather than encourage a NullPointerException LOG.warn(WARN_RULE_NOT_FOUND + ruleName); rule = DUMB_RULE; } } return rule; } /** * Main method to retrieve the correct AnalysisRuleFactory to further obtain * AnalysisRule. * * @param cxt the contextual needs this factory must use to operate. * @return AnalysisRuleFactory for this site. */ public static AnalysisRuleFactory instanceOf(final Context cxt) { final Site site = cxt.getSite(); assert null != site : "valueOf(cxt) got null site"; AnalysisRuleFactory instance = null; try{ INSTANCES_LOCK.readLock().lock(); instance = INSTANCES.get(site); }finally{ INSTANCES_LOCK.readLock().unlock(); } if (instance == null) { try { instance = new AnalysisRuleFactory(cxt); } catch (ParserConfigurationException ex) { LOG.error(ERR_DOC_BUILDER_CREATION, ex); } } return instance; } public boolean remove(final Site site){ try{ INSTANCES_LOCK.writeLock().lock(); return null != INSTANCES.remove(site); }finally{ INSTANCES_LOCK.writeLock().unlock(); } } /** Get all inherited globalPredicates. **/ private Map<String, Predicate> getInheritedPredicates() { final AnalysisRuleFactory parentFactory = getParentFactory(); return parentFactory != null ? parentFactory.getGlobalPredicates() : Collections.<String, Predicate>emptyMap(); } /** Returns this site's and all parent site's global predicates in one map. **/ private Map<String, Predicate> getGlobalPredicates() { init(); final Map<String, Predicate> result = new HashMap<String, Predicate>(globalPredicates); result.putAll(getInheritedPredicates()); return Collections.unmodifiableMap(result); } }