/* * eXist Open Source Native XML Database * Copyright (C) 2008-2015 The eXist-db Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ package org.exist.indexing.lucene; import java.util.Collection; import java.util.Map; import java.util.TreeMap; import org.apache.commons.collections.MultiMap; import org.apache.commons.collections.map.MultiValueMap; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.exist.dom.QName; import org.exist.dom.persistent.AttrImpl; import org.exist.storage.ElementValue; import org.exist.storage.NodePath; import org.exist.util.DatabaseConfigurationException; import org.w3c.dom.Attr; import org.w3c.dom.Element; import org.w3c.dom.Node; public class LuceneIndexConfig { private final static String N_INLINE = "inline"; private final static String N_IGNORE = "ignore"; private final static String IGNORE_ELEMENT = "ignore"; private final static String INLINE_ELEMENT = "inline"; private final static String MATCH_ATTR_ELEMENT = "match-attribute"; private final static String HAS_ATTR_ELEMENT = "has-attribute"; private final static String MATCH_SIBLING_ATTR_ELEMENT = "match-sibling-attribute"; private final static String HAS_SIBLING_ATTR_ELEMENT = "has-sibling-attribute"; public static final String QNAME_ATTR = "qname"; public static final String MATCH_ATTR = "match"; public static final String FIELD_ATTR = "field"; public static final String TYPE_ATTR = "type"; private String name = null; private NodePath path = null; private boolean isQNameIndex = false; private Map<QName, String> specialNodes = null; private LuceneIndexConfig nextConfig = null; private FieldType type = null; // This is for the @attr match boosting // and the intention is to do a proper predicate check instead in the future. /ljo private MultiMap matchAttrs; protected final static Logger LOG = LogManager.getLogger(LuceneIndexConfig.class); public LuceneIndexConfig(Element config, Map<String, String> namespaces, AnalyzerConfig analyzers, Map<String, FieldType> fieldTypes) throws DatabaseConfigurationException { if (config.hasAttribute(QNAME_ATTR)) { QName qname = parseQName(config, namespaces); path = new NodePath(qname); isQNameIndex = true; } else { String matchPath = config.getAttribute(MATCH_ATTR); try { path = new NodePath(namespaces, matchPath); if (path.length() == 0) throw new DatabaseConfigurationException("Lucene module: Invalid match path in collection config: " + matchPath); } catch (IllegalArgumentException e) { throw new DatabaseConfigurationException("Lucene module: invalid qname in configuration: " + e.getMessage()); } } String name = config.getAttribute(FIELD_ATTR); if (name != null && name.length() > 0) setName(name); String fieldType = config.getAttribute(TYPE_ATTR); if (fieldType != null && fieldType.length() > 0) type = fieldTypes.get(fieldType); if (type == null) type = new FieldType(config, analyzers); parse(config, namespaces); } private void parse(Element root, Map<String, String> namespaces) throws DatabaseConfigurationException { Node child = root.getFirstChild(); while (child != null) { if (child.getNodeType() == Node.ELEMENT_NODE) { final String localName = child.getLocalName(); if (null != localName) { Element configElement = (Element) child; switch (localName) { case IGNORE_ELEMENT: { String qnameAttr = configElement.getAttribute(QNAME_ATTR); if (StringUtils.isEmpty(qnameAttr)) { throw new DatabaseConfigurationException("Lucene configuration element 'ignore' needs an attribute 'qname'"); } if (specialNodes == null) { specialNodes = new TreeMap<>(); } specialNodes.put(parseQName(qnameAttr, namespaces), N_IGNORE); break; } case INLINE_ELEMENT: { String qnameAttr = configElement.getAttribute(QNAME_ATTR); if (StringUtils.isEmpty(qnameAttr)) { throw new DatabaseConfigurationException("Lucene configuration element 'inline' needs an attribute 'qname'"); } if (specialNodes == null) { specialNodes = new TreeMap<>(); } specialNodes.put(parseQName(qnameAttr, namespaces), N_INLINE); break; } case MATCH_SIBLING_ATTR_ELEMENT: case HAS_SIBLING_ATTR_ELEMENT: case HAS_ATTR_ELEMENT: case MATCH_ATTR_ELEMENT: { final boolean doMatch = localName.equals(MATCH_ATTR_ELEMENT) || localName.equals(MATCH_SIBLING_ATTR_ELEMENT); final boolean onSibling = localName.equals(HAS_SIBLING_ATTR_ELEMENT) || localName.equals(MATCH_SIBLING_ATTR_ELEMENT); if (onSibling && !isAttributeNode()) { throw new DatabaseConfigurationException( "Lucene module: " + localName + " can only be used on attribute"); } else if (!onSibling && isAttributeNode()) { throw new DatabaseConfigurationException( "Lucene module: " + localName + " can not be used on attribute"); } final String qname = configElement.getAttribute("qname"); if (StringUtils.isEmpty(qname)) { throw new DatabaseConfigurationException("Lucene configuration element '" + localName + " needs an attribute 'qname'"); } float boost; final String boostStr = configElement.getAttribute("boost"); try { boost = Float.parseFloat(boostStr); } catch (NumberFormatException e) { throw new DatabaseConfigurationException( "Invalid value for attribute 'boost'. " + "Expected float, got: " + boostStr); } String value = null; if (doMatch) { value = configElement.getAttribute("value"); if (StringUtils.isEmpty(value)) { throw new DatabaseConfigurationException("Lucene configuration element '" + localName + " needs an attribute 'value'"); } } if (matchAttrs == null) matchAttrs = new MultiValueMap(); matchAttrs.put(qname, new MatchAttrData(qname, value, boost, onSibling)); break; } } } } child = child.getNextSibling(); } } // return saved Analyzer for use in LuceneMatchListener public Analyzer getAnalyzer() { return type.getAnalyzer(); } public String getAnalyzerId() { return type.getAnalyzerId(); } public QName getQName() { return path.getLastComponent(); } public NodePath getNodePath() { return path; } public float getBoost() { return type.getBoost(); } /** * Get boost by matching the config with given attributes * (e.g. sibling or child atributes) * if no match, the value from getBoost() is returned */ public float getAttrBoost(Collection<AttrImpl> attributes) { float boost = 0; boolean hasBoost = false; for (Attr attr : attributes) { Collection<MatchAttrData> matchAttrData = (Collection<MatchAttrData>) matchAttrs.get(attr.getName()); if (matchAttrData == null) { continue; } for (MatchAttrData matchAttrDatum : matchAttrData) { // if matchAttr value is null we don't care about the value if (matchAttrDatum.value == null || matchAttrDatum.value.equals(attr.getValue())) { hasBoost = true; boost += matchAttrDatum.boost; // we matched the attribute already, but since we allow // further boost on the attribute, e g // both from "has-attribute" and "match-attribute" // there is no break here } } } if (hasBoost) { return boost; } else { return getBoost(); } } public void setName(String name) { this.name = name; } public String getName() { return name; } public void add(LuceneIndexConfig config) { if (nextConfig == null) nextConfig = config; else nextConfig.add(config); } public LuceneIndexConfig getNext() { return nextConfig; } private boolean isAttributeNode() { return path.getLastComponent().getNameType() == ElementValue.ATTRIBUTE; } /** * @return true if this index can be queried by name */ public boolean isNamed() { return name != null; } public boolean isIgnoredNode(QName qname) { return specialNodes != null && specialNodes.get(qname) == N_IGNORE; } public boolean isInlineNode(QName qname) { return specialNodes != null && specialNodes.get(qname) == N_INLINE; } public static QName parseQName(Element config, Map<String, String> namespaces) throws DatabaseConfigurationException { String name = config.getAttribute(QNAME_ATTR); if (StringUtils.isEmpty(name)) throw new DatabaseConfigurationException("Lucene index configuration error: element " + config.getNodeName() + " must have an attribute " + QNAME_ATTR); return parseQName(name, namespaces); } protected static QName parseQName(String name, Map<String, String> namespaces) throws DatabaseConfigurationException { boolean isAttribute = false; if (name.startsWith("@")) { isAttribute = true; name = name.substring(1); } try { String prefix = QName.extractPrefix(name); String localName = QName.extractLocalName(name); String namespaceURI = ""; if (prefix != null) { namespaceURI = namespaces.get(prefix); if(namespaceURI == null) { throw new DatabaseConfigurationException("No namespace defined for prefix: " + prefix + " in index definition"); } } final QName qname; if (isAttribute) { qname = new QName(localName, namespaceURI, prefix, ElementValue.ATTRIBUTE); } else { qname = new QName(localName, namespaceURI, prefix); } return qname; } catch (IllegalArgumentException e) { throw new DatabaseConfigurationException("Lucene index configuration error: " + e.getMessage(), e); } } public boolean match(NodePath other) { if (isQNameIndex) { final QName qn1 = path.getLastComponent(); final QName qn2 = other.getLastComponent(); return qn1.getNameType() == qn2.getNameType() && qn2.equals(qn1); } return path.match(other); } @Override public String toString() { return path.toString(); } boolean shouldReindexOnAttributeChange() { return matchAttrs != null; } private static class MatchAttrData { final String qname; final String value; final float boost; final boolean onSibling; MatchAttrData(String qname, String value, float boost, boolean onSibling) { this.qname = qname; this.value = value; this.boost = boost; this.onSibling = onSibling; } } }