package lux; import java.net.URI; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.xml.transform.ErrorListener; import lux.compiler.EXPathSupport; import lux.compiler.PathOptimizer; import lux.compiler.SaxonTranslator; import lux.exception.LuxException; import lux.functions.ExtensionFunctions; import lux.functions.LuxFunctionLibrary; import lux.functions.file.FileExtensions; import lux.index.IndexConfiguration; import lux.index.field.FieldDefinition; import lux.index.field.XPathField; import lux.xml.GentleXmlReader; import lux.xpath.AbstractExpression; import lux.xpath.FunCall; import lux.xpath.NodeTest; import lux.xpath.PathStep; import lux.xpath.PathStep.Axis; import lux.xpath.PropEquiv; import lux.xquery.XQuery; import net.sf.saxon.Configuration; import net.sf.saxon.Configuration.LicenseFeature; import net.sf.saxon.lib.CollectionURIResolver; import net.sf.saxon.lib.FeatureKeys; import net.sf.saxon.s9api.Processor; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.XPathCompiler; import net.sf.saxon.s9api.XPathExecutable; import net.sf.saxon.s9api.XQueryCompiler; import net.sf.saxon.s9api.XQueryExecutable; import net.sf.saxon.s9api.XsltCompiler; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Compiles XQuery using Saxon's compiler and optimizes it for use with a Lucene index. * This class is thread-safe, and should be re-used for multiple queries. */ public class Compiler { private final Logger logger; private final Processor processor; private final CollectionURIResolver defaultCollectionURIResolver; private final String uriFieldName; private final IndexConfiguration indexConfig; private final boolean isSaxonLicensed; private final HashMap<PropEquiv,ArrayList<AbstractExpression>> fieldLeaves; private final HashMap<AbstractExpression, XPathField> fieldExpressions; private final HashMap<String,String> namespaceBindings; private final PropEquiv tempEquiv; public enum SearchStrategy { NONE, // the query is evaluated without any modification LUX_UNOPTIMIZED, // collection() is inserted for Root() LUX_SEARCH, // full suite of Lux optimizations are applied, consistent with available indexes SAXON_LICENSE, // Only optimizations compatible with Saxon-PE/EE are applied } private SearchStrategy searchStrategy; /** Creates a Compiler configured according to the given {@link IndexConfiguration}. * A Saxon Processor is generated using the installed version of Saxon. If a licensed version of Saxon * (PE or EE) is installed, the presence of a license is asserted so as to enable the use of licensed Saxon features. * @param config the index configuration */ public Compiler (IndexConfiguration config) { this (makeProcessor(), config); } /** Creates a Compiler using the provided {@link Processor} and {@link IndexConfiguration}. * @param processor the Saxon Processor * @param indexConfig the index configuration */ public Compiler(Processor processor, IndexConfiguration indexConfig) { this.indexConfig = indexConfig; // indexGeneration = new AtomicInteger(0); this.processor = processor; Configuration config = processor.getUnderlyingConfiguration(); config.setDocumentNumberAllocator(new DocIDNumberAllocator()); config.setConfigurationProperty(FeatureKeys.XQUERY_PRESERVE_NAMESPACES, false); namespaceBindings = new HashMap<String, String>(); namespaceBindings.put ("lux", Evaluator.LUX_NAMESPACE); GentleXmlReader parser = new GentleXmlReader(); config.getParseOptions().setEntityResolver(parser.getEntityResolver()); // tried this, but it seems to lead to concurrent usage of the same parser: //config.getParseOptions().setXMLReader(parser); // the question is: does Saxon re-use a single instance of this parser?? config.setSourceParserClass("lux.xml.GentleXmlReader"); isSaxonLicensed = config.isLicensedFeature(LicenseFeature.PROFESSIONAL_EDITION) || config.isLicensedFeature(LicenseFeature.ENTERPRISE_XQUERY); if (indexConfig == null || !indexConfig.isIndexingEnabled()) { searchStrategy = SearchStrategy.NONE; } else if (isSaxonLicensed) { searchStrategy = SearchStrategy.SAXON_LICENSE; } else { searchStrategy = SearchStrategy.LUX_SEARCH; } defaultCollectionURIResolver = config.getCollectionURIResolver(); registerExtensionFunctions(); uriFieldName = indexConfig.getUriFieldName(); //this.dialect = dialect; logger = LoggerFactory.getLogger(getClass()); fieldLeaves = new HashMap<PropEquiv, ArrayList<AbstractExpression>>(); fieldExpressions = new HashMap<AbstractExpression, XPathField>(); tempEquiv = new PropEquiv(null); compileFieldExpressions (); } /** * Compiles the XQuery expression (main module) using a Saxon {@link XQueryCompiler}, then translates it into a mutable {@link AbstractExpression} * tree using a {@link SaxonTranslator}, optimizes it with a {@link PathOptimizer}, and then re-serializes and re-compiles. * @param exprString the XQuery source * @return the compiled XQuery expression * @throws LuxException if any error occurs while compiling, such as a static XQuery error or syntax error. */ public XQueryExecutable compile(String exprString) throws LuxException { return compile (exprString, null, null, null); } public XQueryExecutable compile(String exprString, ErrorListener errorListener) throws LuxException { return compile (exprString, errorListener, null, null); } public XQueryExecutable compile(String exprString, ErrorListener errorListener, QueryStats stats) throws LuxException { return compile (exprString, errorListener, null, stats); } /** * Compiles an XQuery expression, returning a Saxon XQueryExecutable. * @param exprString the expression to compile * @param errorListener receives any errors generated while compiling; may be null, in which case * any errors generated will be lost * @param baseURI the base URI of the compiled query * @param stats accumulates statistics about the query execution for debugging and logging (if not null) * @return the compiled, executable query object * @throws LuxException when a compilation error occurs. The message is typically unhelpful; meaningful errors * are stored in the errorListener */ public XQueryExecutable compile(String exprString, ErrorListener errorListener, URI baseURI, QueryStats stats) throws LuxException { XQueryExecutable xquery; XQueryCompiler xQueryCompiler = getXQueryCompiler(); if (errorListener != null) { xQueryCompiler.setErrorListener(errorListener); } if (baseURI != null) { xQueryCompiler.setBaseURI(baseURI); } try { xquery = xQueryCompiler.compile(exprString); } catch (SaxonApiException e) { throw new LuxException (e); } SaxonTranslator translator = makeTranslator(); if (searchStrategy == SearchStrategy.NONE) { return xquery; } XQuery abstractQuery = translator.queryFor (xquery); PathOptimizer optimizer = new PathOptimizer(this); optimizer.setSearchStrategy(searchStrategy); XQuery optimizedQuery = null; try { optimizedQuery = optimizer.optimize(abstractQuery); if (stats != null) { stats.optimizedXQuery = optimizedQuery; } } catch (LuxException e) { if (logger.isDebugEnabled()) { logger.debug ("An error occurred while optimizing: " + abstractQuery.toString()); } throw (e); } String queryString = optimizedQuery.toString(); if (logger.isDebugEnabled()) { logger.debug("optimized xquery: " + queryString); } try { xquery = xQueryCompiler.compile(queryString); } catch (SaxonApiException e) { throw new LuxException (e); } return xquery; } private static Processor makeProcessor () { try { if (Class.forName("com.saxonica.config.EnterpriseConfiguration") != null) { return new Processor (true); } } catch (ClassNotFoundException e) { } try { if (Class.forName("com.saxonica.config.ProfessionalConfiguration") != null) { //return new Processor (new Config()); return new Processor (true); } } catch (ClassNotFoundException e) { } Processor p = new Processor (new Config()); if (! StringUtils.isEmpty(System.getProperty("org.expath.pkg.saxon.repo"))) { EXPathSupport.initializeEXPath(p); } return p; } private void registerExtensionFunctions() { LuxFunctionLibrary.registerFunctions(processor); FileExtensions.registerFunctions(processor); ExtensionFunctions.registerFunctions(processor); } public XsltCompiler getXsltCompiler () { return processor.newXsltCompiler(); } public XQueryCompiler getXQueryCompiler () { XQueryCompiler xqueryCompiler = processor.newXQueryCompiler(); for (java.util.Map.Entry<String, String> binding : namespaceBindings.entrySet()) { xqueryCompiler.declareNamespace(binding.getKey(), binding.getValue()); } xqueryCompiler.declareNamespace("lux", FunCall.LUX_NAMESPACE); return xqueryCompiler; } public XPathCompiler getXPathCompiler () { XPathCompiler xpathCompiler = processor.newXPathCompiler(); xpathCompiler.declareNamespace("lux", FunCall.LUX_NAMESPACE); return xpathCompiler; } public IndexConfiguration getIndexConfiguration () { return indexConfig; } public Processor getProcessor() { return processor; } public SaxonTranslator makeTranslator () { return new SaxonTranslator(processor.getUnderlyingConfiguration()); } public CollectionURIResolver getDefaultCollectionURIResolver() { return defaultCollectionURIResolver; } public String getUriFieldName() { return uriFieldName; } /** * @return the strategy that defines the way in which optimizer-generated searches are to be encoded: * either as calls to lux:search(), or as calls to collection() with a uri beginning "lux:". */ public SearchStrategy getSearchStrategy() { return searchStrategy; } public void setSearchStrategy(SearchStrategy searchStrategy) { this.searchStrategy = searchStrategy; } public boolean isSaxonLicensed() { return isSaxonLicensed; } public List<AbstractExpression> getFieldLeaves(AbstractExpression leafExpr) { List<AbstractExpression> allLeaves = new ArrayList<AbstractExpression>(); // get leaves that are equivalent to leafExpr addMatchingLeaves (leafExpr, allLeaves); if (leafExpr instanceof PathStep) { // also get leaves that are geq leafExpr PathStep.Axis axis = ((PathStep) leafExpr).getAxis(); NodeTest nodeTest = ((PathStep) leafExpr).getNodeTest(); PathStep step; for (Axis extAxis : axis.extensions) { // try various generalizations: self->ancestor-or-self, etc step = new PathStep (extAxis, nodeTest); addMatchingLeaves (step, allLeaves); } if (! nodeTest.isWild()) { // try matching indexes with "*" nodeTest = new NodeTest (nodeTest.getType()); step = new PathStep (axis, nodeTest); addMatchingLeaves (step, allLeaves); for (Axis extAxis : axis.extensions) { step = new PathStep (extAxis, nodeTest); addMatchingLeaves (step, allLeaves); } } } return allLeaves; } private void addMatchingLeaves (AbstractExpression expr, List<AbstractExpression> allLeaves) { tempEquiv.setExpression(expr); ArrayList<AbstractExpression> leaves = fieldLeaves.get(tempEquiv); if (leaves != null) { allLeaves.addAll (leaves); } } public FieldDefinition getFieldForExpr(AbstractExpression fieldExpr) { return fieldExpressions.get(fieldExpr); } /** * bind the prefix to the namespace, making the binding available to compiled expressions * @param prefix if empty, the default namespace is bound * @param namespace if empty or null, any existing binding for the prefix is removed */ public void bindNamespacePrefix (String prefix, String namespace) { if (StringUtils.isEmpty(namespace)) { namespaceBindings.remove(prefix); } else { namespaceBindings.put(prefix, namespace); } } /** * Save an AbstractExpression version of each XPathField's xpath, for use when optimizing. * This must be called whenever the underlying indexConfiguration's collection of XPath fields * changes. TODO: consider moving the addField method to Compiler? */ public void compileFieldExpressions () { SaxonTranslator translator = new SaxonTranslator(processor.getUnderlyingConfiguration()); XPathCompiler xPathCompiler = getXPathCompiler(); for (Map.Entry<String,String> e : indexConfig.getNamespaceMap().entrySet()) { xPathCompiler.declareNamespace(e.getKey(), e.getValue()); } for (FieldDefinition field : indexConfig.getFields()) { if (field instanceof XPathField) { String xpath = ((XPathField) field).getXPath(); XPathExecutable xpathExec; try { xpathExec = xPathCompiler.compile(xpath); } catch (SaxonApiException e) { throw new LuxException("Error compiling index expression " + xpath + " for field " + field.getName()); } AbstractExpression xpathExpr = translator.exprFor(xpathExec.getUnderlyingExpression().getInternalExpression()); AbstractExpression leaf = xpathExpr.getLastContextStep(); PropEquiv leafEquiv = new PropEquiv(leaf); if (fieldLeaves.containsKey(leaf)) { fieldLeaves.get(leafEquiv).add(leaf); } else { ArrayList<AbstractExpression> leaves = new ArrayList<AbstractExpression>(); leaves.add (leaf); fieldLeaves.put(leafEquiv, leaves); } fieldExpressions.put(xpathExpr, (XPathField) field); } } } } /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */