package org.exist.indexing.lucene; import java.util.*; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.exist.dom.QName; import org.exist.storage.NodePath; import org.exist.util.DatabaseConfigurationException; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class LuceneConfig { private final static Logger LOG = Logger.getLogger(LuceneConfig.class); private final static String CONFIG_ROOT = "lucene"; private final static String INDEX_ELEMENT = "text"; private final static String ANALYZER_ELEMENT = "analyzer"; private static final String INLINE_ELEMENT = "inline"; private static final String IGNORE_ELEMENT = "ignore"; private final static String BOOST_ATTRIB = "boost"; private Map<QName, LuceneIndexConfig> paths = new TreeMap<QName, LuceneIndexConfig>(); private List<LuceneIndexConfig> wildcardPaths = new ArrayList<LuceneIndexConfig>(); private Map<String, LuceneIndexConfig> namedIndexes = new TreeMap<String, LuceneIndexConfig>(); private Set<QName> inlineNodes = null; private Set<QName> ignoreNodes = null; private PathIterator iterator = new PathIterator(); private float boost = -1; private AnalyzerConfig analyzers = new AnalyzerConfig(); public LuceneConfig(NodeList configNodes, Map<String, String> namespaces) throws DatabaseConfigurationException { parseConfig(configNodes, namespaces); } /** * Copy constructor. LuceneConfig is only configured once by database instance, * so to avoid concurrency issues when using e.g. iterator, we create a copy. * * @param other */ public LuceneConfig(LuceneConfig other) { this.paths = other.paths; this.wildcardPaths = other.wildcardPaths; this.namedIndexes = other.namedIndexes; this.inlineNodes = other.inlineNodes; this.ignoreNodes = other.ignoreNodes; this.boost = other.boost; this.analyzers = other.analyzers; } public boolean matches(NodePath path) { LuceneIndexConfig idxConf = paths.get(path.getLastComponent()); while (idxConf != null) { if (idxConf.match(path)) return true; idxConf = idxConf.getNext(); } for (LuceneIndexConfig config : wildcardPaths) { if (config.match(path)) return true; } return false; } public Iterator<LuceneIndexConfig> getConfig(NodePath path) { iterator.reset(path); return iterator; } protected LuceneIndexConfig getWildcardConfig(NodePath path) { LuceneIndexConfig config; for (int i = 0; i < wildcardPaths.size(); i++) { config = wildcardPaths.get(i); if (config.match(path)) return config; } return null; } public Analyzer getAnalyzer(QName qname) { LuceneIndexConfig idxConf = paths.get(qname); while (idxConf != null) { if (!idxConf.isNamed() && idxConf.getNodePath().match(qname)) break; idxConf = idxConf.getNext(); } if (idxConf != null) { String id = idxConf.getAnalyzerId(); if (id != null) return analyzers.getAnalyzerById(idxConf.getAnalyzerId()); } return analyzers.getDefaultAnalyzer(); } public Analyzer getAnalyzer(NodePath nodePath) { if (nodePath.length() == 0) throw new RuntimeException(); LuceneIndexConfig idxConf = paths.get(nodePath.getLastComponent()); while (idxConf != null) { if (!idxConf.isNamed() && idxConf.match(nodePath)) break; idxConf = idxConf.getNext(); } if (idxConf == null) { for (LuceneIndexConfig config : wildcardPaths) { if (config.match(nodePath)) return config.getAnalyzer(); } } if (idxConf != null) { String id = idxConf.getAnalyzerId(); if (id != null) return analyzers.getAnalyzerById(idxConf.getAnalyzerId()); } return analyzers.getDefaultAnalyzer(); } public Analyzer getAnalyzer(String field) { LuceneIndexConfig config = namedIndexes.get(field); if (config != null) { String id = config.getAnalyzerId(); if (id != null) return analyzers.getAnalyzerById(config.getAnalyzerId()); } return analyzers.getDefaultAnalyzer(); } public Analyzer getAnalyzerById(String id) { return analyzers.getAnalyzerById(id); } public boolean isInlineNode(QName qname) { return inlineNodes != null && inlineNodes.contains(qname); } public boolean isIgnoredNode(QName qname) { return ignoreNodes != null && ignoreNodes.contains(qname); } public float getBoost() { return boost; } /** * Parse a configuration entry. The main configuration entries for this index * are the <text> elements. They may be enclosed by a <lucene> element. * * @param configNodes * @param namespaces * @throws org.exist.util.DatabaseConfigurationException */ protected void parseConfig(NodeList configNodes, Map<String, String> namespaces) throws DatabaseConfigurationException { Node node; for(int i = 0; i < configNodes.getLength(); i++) { node = configNodes.item(i); if(node.getNodeType() == Node.ELEMENT_NODE) { if (CONFIG_ROOT.equals(node.getLocalName())) { Element elem = (Element) node; if (elem.hasAttribute(BOOST_ATTRIB)) { String value = elem.getAttribute(BOOST_ATTRIB); try { boost = Float.parseFloat(value); } catch (NumberFormatException e) { throw new DatabaseConfigurationException("Invalid value for 'boost' attribute in " + "lucene index config: float expected, got " + value); } } parseConfig(node.getChildNodes(), namespaces); } else if (ANALYZER_ELEMENT.equals(node.getLocalName())) { analyzers.addAnalyzer((Element) node); } else if (INDEX_ELEMENT.equals(node.getLocalName())) { // found an index definition Element elem = (Element) node; try { LuceneIndexConfig config = new LuceneIndexConfig(elem, namespaces, analyzers); // if it is a named index, add it to the namedIndexes map if (config.getName() != null) namedIndexes.put(config.getName(), config); // register index either by QName or path if (config.getNodePath().hasWildcard()) { wildcardPaths.add(config); } else { LuceneIndexConfig idxConf = paths.get(config.getNodePath().getLastComponent()); if (idxConf == null) paths.put(config.getNodePath().getLastComponent(), config); else idxConf.add(config); } } catch (DatabaseConfigurationException e) { LOG.warn("Invalid lucene configuration element: " + e.getMessage()); } } else if (INLINE_ELEMENT.equals(node.getLocalName())) { Element elem = (Element) node; QName qname = LuceneIndexConfig.parseQName(elem, namespaces); if (inlineNodes == null) inlineNodes = new TreeSet<QName>(); inlineNodes.add(qname); } else if (IGNORE_ELEMENT.equals(node.getLocalName())) { Element elem = (Element) node; QName qname = LuceneIndexConfig.parseQName(elem, namespaces); if (ignoreNodes == null) ignoreNodes = new TreeSet<QName>(); ignoreNodes.add(qname); } } } } private class PathIterator implements Iterator<LuceneIndexConfig> { private LuceneIndexConfig nextConfig; private NodePath path; private boolean atLast = false; protected void reset(NodePath path) { this.atLast = false; this.path = path; nextConfig = paths.get(path.getLastComponent()); if (nextConfig == null) { nextConfig = getWildcardConfig(path); atLast = true; } } //@Override public boolean hasNext() { return (nextConfig != null); } //@Override public LuceneIndexConfig next() { if (nextConfig == null) return null; LuceneIndexConfig currentConfig = nextConfig; nextConfig = nextConfig.getNext(); if (nextConfig == null && !atLast) { nextConfig = getWildcardConfig(path); atLast = true; } return currentConfig; } //@Override public void remove() { //Nothing to do } } }