package org.cdlib.xtf.textEngine; /* * Copyright (c) 2004, Regents of the University of California * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the University of California nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Acknowledgements: * * A significant amount of new and/or modified code in this module * was made possible by a grant from the Andrew W. Mellon Foundation, * as part of the Melvyl Recommender Project. */ import java.io.File; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.StringTokenizer; import java.util.Vector; import javax.xml.transform.Source; import net.sf.saxon.Configuration; import net.sf.saxon.om.NodeInfo; import net.sf.saxon.trans.XPathException; import net.sf.saxon.tree.TreeBuilder; import org.apache.lucene.chunk.SpanChunkedNotQuery; import org.apache.lucene.chunk.SpanDechunkingQuery; import org.apache.lucene.index.Term; import org.apache.lucene.mark.ContextMarker; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNotNearQuery; import org.apache.lucene.search.spans.SpanOrNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.cdlib.xtf.textEngine.facet.FacetSpec; import org.cdlib.xtf.textEngine.facet.GroupSelector; import org.cdlib.xtf.textEngine.facet.MarkSelector; import org.cdlib.xtf.textEngine.facet.RootSelector; import org.cdlib.xtf.textEngine.facet.SelectorParser; import org.cdlib.xtf.util.EasyNode; import org.cdlib.xtf.util.FloatList; import org.cdlib.xtf.util.GeneralException; import org.cdlib.xtf.util.Path; import org.cdlib.xtf.util.Trace; /** * Processes URL parameters into a Lucene query, using a stylesheet to perform * the heavy lifting. * * @author Martin Haye */ public class QueryRequestParser { /** Partially parsed request in progress */ private QueryRequest req; /** * Keeps track of the servlet base directory, used to map relative * file paths. */ private File baseDir; /** * Configuration object used when building trees (only created if * necessary.) */ private Configuration config; /** The top-level source node */ private NodeInfo topNode; /** Global attributes that were actually specified in the query */ private HashSet specifiedGlobalAttrs = new HashSet(); /** Accumulated list of grouping specifications */ private Vector groupSpecs = new Vector(); /** Default value for maxSnippets, so we can recognize difference between * the default and a user-specified value. */ private static final int DEFAULT_MAX_SNIPPETS = 888888888; /** * Produce a Lucene query from the intermediate format that is normally * produced by the formatting stylesheet. Includes setting a default * indexPath, so the query doesn't have to contain one internally. * * @param queryDoc A document containing the query. */ public QueryRequest parseRequest(Source queryDoc, File baseDir, String defaultIndexPath) throws QueryGenException, QueryFormatError { // Make a new request to start filling in. req = new QueryRequest(); // Record the parameters this.baseDir = baseDir; req.indexPath = defaultIndexPath; // Now parse it, according to the kind of Source. if (queryDoc instanceof NodeInfo) parseOutputTop(new EasyNode((NodeInfo)queryDoc)); else { if (config == null) config = new Configuration(); try { NodeInfo top = TreeBuilder.build(queryDoc, null, config); parseOutputTop(new EasyNode(top)); } catch (XPathException e) { throw new RuntimeException(e); } } // Convert the grouping specifications to an easy-to-use array. if (groupSpecs.size() > 0) { req.facetSpecs = (FacetSpec[])groupSpecs.toArray( new FacetSpec[groupSpecs.size()]); } // And we're done. return req; } // parseRequest /** * Produce a Lucene query from the intermediate format that is normally * produced by the formatting stylesheet. * * @param queryDoc A document containing the query. */ public QueryRequest parseRequest(Source queryDoc, File baseDir) throws QueryGenException, QueryFormatError { return parseRequest(queryDoc, baseDir, null); } // constructor /** Get an XML source suitable for re-creating this query */ public Source getSource() { return topNode; } // getSource() /** Get the base directory from which relative paths are resolved */ public File getBaseDir() { return baseDir; } // getBaseDir() /** * Convenience function to throw a {@link QueryGenException} with the * given message. */ private void error(String message) throws QueryGenException { throw new QueryGenException(message); } // error() /** * Processes the output of the generator stylesheet, turning it into a * Lucene query. * * @param output The stylesheet output, whose first (and only) child * should be a 'query' element. */ private void parseOutputTop(EasyNode output) throws QueryGenException, QueryFormatError { if ("query".equals(output.name()) || "error".equals(output.name())) { parseOutput(output); return; } topNode = output.getWrappedNode(); for (int i = 0; i < output.nChildren(); i++) { EasyNode main = output.child(i); String name = main.name(); if (!main.isElement()) continue; if (!name.equals("query") && !name.equals("error")) error( "Expected 'query' or 'error' element at " + "top level; found '" + name + "'"); parseOutput(main); } } // parseOutput() /** * Processes the main query node, turning it into a Lucene query. * * @param main The 'query' element */ private void parseOutput(EasyNode main) { if (main.name().equals("error")) throw new QueryFormatError(main.attrValue("message")); // Process all the top-level attributes. int maxSnippets = DEFAULT_MAX_SNIPPETS; for (int i = 0; i < main.nAttrs(); i++) { String name = main.attrName(i); String val = main.attrValue(i); if (name.equals("maxSnippets")) { maxSnippets = parseIntAttrib(main, name); if (maxSnippets < 0) maxSnippets = 999999999; } else parseMainAttrib(main, name, val); } // Process the children. If we find an old <combine> element, // traverse it just like a top-level query. // int nChildQueries = 0; for (int i = 0; i < main.nChildren(); i++) { EasyNode el = main.child(i); if (!el.isElement()) continue; else if ("facet".equalsIgnoreCase(el.name())) parseFacetSpec(el); else if ("spellcheck".equalsIgnoreCase(el.name())) parseSpellcheck(el); else if ("resultData".equalsIgnoreCase(el.name())) continue; else { req.query = deChunk(parseQuery(el, null, DEFAULT_MAX_SNIPPETS)); nChildQueries++; } } if (nChildQueries > 1) { error( "<" + main.name() + "> element must have " + " at most one child query"); } if (main.name().equals("query") && Trace.getOutputLevel() >= Trace.debug) { Trace.debug("Lucene query as parsed: " + req.query); } // Check that we got the required parameters. if (main.name().equals("query")) { if (req.indexPath == null) error("'indexPath' attribute missing from <query> element"); } } // parseOutput() /** * Parses a 'facet' element and adds a FacetSpec to the query. * * @param el The 'facet' element to parse */ void parseFacetSpec(EasyNode el) { // Process all the attributes. FacetSpec fs = new FacetSpec(); for (int i = 0; i < el.nAttrs(); i++) { if (el.attrName(i).equalsIgnoreCase("field")) fs.field = el.attrValue(i); else if (el.attrName(i).equalsIgnoreCase("sortGroupsBy")) { if (el.attrValue(i).matches("^(totalDocs|value|reverseValue|maxDocScore)$")) fs.sortGroupsBy = el.attrValue(i); else { error( "Expected 'totalDocs', 'maxDocScore', or 'value' for '" + el.attrName(i) + "' attribute, but found '" + el.attrValue(i) + "' (on '" + el.name() + " element)"); } } else if (el.attrName(i).equalsIgnoreCase("sortDocsBy")) fs.sortDocsBy = el.attrValue(i); else if (el.attrName(i).equalsIgnoreCase("includeEmptyGroups")) { if (el.attrValue(i).matches("^(true|yes)$")) fs.includeEmptyGroups = true; else if (el.attrValue(i).matches("^(false|no)$")) fs.includeEmptyGroups = false; else { error( "Expected 'yes', 'no', 'true', or 'false' for '" + el.attrName(i) + "' attribute, but found '" + el.attrValue(i) + "' (on '" + el.name() + " element)"); } } else if (el.attrName(i).equalsIgnoreCase("select")) { try { SelectorParser parser = new SelectorParser( new StringReader(el.attrValue(i))); fs.groupSelector = parser.parse(); } catch (org.cdlib.xtf.textEngine.facet.ParseException e) { error( "Error parsing '" + el.attrName(i) + "' expression: " + e.getMessage()); } // catch } // else } // for i // Make sure a field name was specified. if (fs.field == null || fs.field.length() == 0) error("'" + el.name() + "' element requires 'field' attribute"); // If no group selection, put in the default. if (fs.groupSelector == null) { GroupSelector root = new RootSelector(); GroupSelector mark = new MarkSelector(); root.setNext(mark); fs.groupSelector = root; } // Make sure there is only one groupField element per field. for (int i = 0; i < groupSpecs.size(); i++) { FacetSpec other = ((FacetSpec)groupSpecs.elementAt(i)); if (other.field.equalsIgnoreCase(fs.field)) error( "Specifying two '" + el.name() + "' elements for the " + "same field is illegal"); } // Finally, add the new group spec to the query. groupSpecs.add(fs); } // parseFacetSpec /** * Parses a 'spellcheck' element and adds a SpellcheckParams to the query. * * @param el The 'spellcheck' element to parse */ void parseSpellcheck(EasyNode el) { SpellcheckParams params = new SpellcheckParams(); // Process all the attributes. for (int i = 0; i < el.nAttrs(); i++) { String attr = el.attrName(i); if (attr.equalsIgnoreCase("fields") || attr.equalsIgnoreCase("field")) { String fieldsStr = parseStringAttrib(el, attr); if (!fieldsStr.equals("#all")) { params.fields = new HashSet(); StringTokenizer st = new StringTokenizer(fieldsStr, ";,| \t"); while (st.hasMoreTokens()) params.fields.add(st.nextToken()); } } else if (attr.equalsIgnoreCase("docScoreCutoff")) params.docScoreCutoff = parseFloatAttrib(el, attr); else if (attr.equalsIgnoreCase("totalDocsCutoff")) params.totalDocsCutoff = parseIntAttrib(el, attr); else if (attr.equalsIgnoreCase("suggestionsPerTerm")) ; // for backward compatibility else error("Unknown attribute '" + attr + "' on '" + el.name() + "' element"); } // for i // Finally, add the new params to the query. req.spellcheckParams = params; } // parseSpellcheck /** * Recursively parse a query. */ private Query parseQuery(EasyNode parent, String field, int maxSnippets) throws QueryGenException { String name = parent.name(); if (!name.matches( "^(" + "query|term|all|range|phrase|exact|near" + "|and|or|not|orNear|allDocs" + "|moreLike" + "|orderedNear" + // experimental "|combine|meta|text)$")) // old stuff, for compatability { error( "Expected one of: (query term all allDocs range phrase " + "exact near orNear and or not moreLike orderedNear); " + "found '" + name + "'"); } // Old stuff, for compatability. if (name.equals("text")) field = "text"; else field = parseField(parent, field); // 'not' queries are handled at the level above. assert (!name.equals("not")); // Default to no boost. float boost = 1.0f; // Validate all attributes. for (int i = 0; i < parent.nAttrs(); i++) { String attrName = parent.attrName(i); String attrVal = parent.attrValue(i); if (attrName.equals("boost")) boost = parseFloatAttrib(parent, attrName); else if (attrName.equals("maxSnippets")) { int oldVal = maxSnippets; maxSnippets = parseIntAttrib(parent, attrName); if (maxSnippets < 0) maxSnippets = 999999999; if (oldVal != DEFAULT_MAX_SNIPPETS && maxSnippets != oldVal) { error( "Value specified for 'maxSnippets' attribute " + "differs from that of an ancestor element."); } } else parseMainAttrib(parent, attrName, attrVal); } // Make sure boostSet and boostSetField are specified together if (req.boostSetParams != null) { if (req.boostSetParams.field != null && req.boostSetParams.path == null) error("'boostSetField' specified without 'boostSet'"); if (req.boostSetParams.field == null && req.boostSetParams.path != null) error("'boostSet' specified without 'boostSetField'"); if (req.boostSetParams.exponent != 1.0f && req.boostSetParams.path == null) error("'boostSetExponent' specified without 'boostSet'"); if (req.boostSetParams.defaultBoost != 1.0f && req.boostSetParams.path == null) error("'boostSetDefault' specified without 'boostSet'"); } // Do the bulk of the parsing below... Query result = parseQuery2(parent, name, field, maxSnippets); if (result == null) return null; // And set any boost that was specified. if (boost != 1.0f) result.setBoost(boost); // If a sectionType query was specified, add that to the mix. SpanQuery secType = parseSectionType(parent, field, maxSnippets); if (secType != null) { SpanQuery combo = new SpanSectionTypeQuery((SpanQuery)result, secType); combo.setSpanRecording(((SpanQuery)result).getSpanRecording()); result = combo; } // If a subDocument query was specified, add that to the mix. SpanQuery subDoc = parseSubDocument(parent, field, maxSnippets); if (subDoc != null) { SpanQuery combo = new SpanSectionTypeQuery((SpanQuery)result, subDoc); combo.setSpanRecording(((SpanQuery)result).getSpanRecording()); result = combo; } // All done! return result; } // parseQuery() /** * Main work of recursively parsing a query. */ private Query parseQuery2(EasyNode parent, String name, String field, int maxSnippets) throws QueryGenException { // All docs query is the simplest kind if (name.equals("allDocs")) return new TermQuery(new Term("docInfo", "1")); // Term query is the next-simplest. if (name.equals("term")) { Term term = parseTerm(parent, field, "term"); SpanQuery q = isWildcardTerm(term) ? new XtfSpanWildcardQuery(term, req.termLimit) : new SpanTermQuery(term); q.setSpanRecording(maxSnippets); return q; } // Get field name if specified. field = parseField(parent, field); // Range queries are also pretty simple. if (name.equals("range")) return parseRange(parent, field, maxSnippets); // Handle 'moreLike' queries separately. if (name.equals("moreLike")) return parseMoreLike(parent, field, maxSnippets); // Multi-field queries are a bit tricky, and therefore handled separately. if (parent.hasAttr("fields")) return parseMultiFieldQuery(parent, field, maxSnippets); // For text queries, 'all', 'phrase', 'exact', and 'near' can be viewed // as phrase queries with different slop values. // // 'all' means essentially infinite slop (limited to the actual // chunk overlap at runtime.) // 'phrase' means zero slop // 'exact' means -1 slop (meaning use a SpanExactQuery) // 'near' allows specifying the slop (again limited to the actual // chunk overlap at runtime.) // 'orNear' is a special case which also allows specifying slop, but // activates a different query. // if (name.matches("^(all|phrase|exact|near|orNear|orderedNear)$")) { int slop = name.equals("all") ? 999999999 : name.equals("phrase") ? 0 : name.equals("exact") ? -1 : parseIntAttrib(parent, "slop"); return makeProxQuery(parent, slop, field, maxSnippets); } // All other cases fall through to here: and, or. Generally we try // to convert these to span queries when possible. However, this // behavior can be turned off by setting the "useProximity" attribute // to false. // boolean useProximity = parseBooleanAttrib(parent, "useProximity", true); if (!useProximity && !name.equals("and")) error("The 'useProximity' attribute is only applicable to 'and' queries"); // Use our special de-duplicating span logic. Get all the sub-queries // (including nots). As we go along, group them by field, and maintain // a list of the unique field names in the order the fields // were encountered. // HashMap subMap = new HashMap(); Vector fields = new Vector(); BooleanQuery bq = new BooleanQuery(); boolean require = !name.equals("or"); for (int i = 0; i < parent.nChildren(); i++) { EasyNode el = parent.child(i); if (!el.isElement()) continue; if (el.name().matches("^(sectionType|subDocument)$")) continue; // handled elsewhere else if (el.name().equalsIgnoreCase("resultData")) continue; // ignore, handled by client's resultFormatter.xsl Query q; boolean isNot = false; if (el.name().equals("not")) { q = parseQuery2(el, name, field, 0); isNot = true; } else q = parseQuery(el, field, maxSnippets); if (q == null) continue; if (useProximity && q instanceof SpanQuery) { String queryField = ((SpanQuery)q).getField(); QueryEntry ent = (QueryEntry)subMap.get(queryField); if (ent == null) { fields.add(queryField); ent = new QueryEntry(queryField); subMap.put(queryField, ent); } if (isNot) ent.nots.add(q); else ent.queries.add(q); } else { BooleanClause.Occur occur; if (isNot) occur = BooleanClause.Occur.MUST_NOT; else if (require) occur = BooleanClause.Occur.MUST; else occur = BooleanClause.Occur.SHOULD; bq.add(deChunk(q), occur); } } // If there are no generic clauses (that is, clauses that span fields), // we can optimize. // BooleanClause[] genericClauses = bq.getClauses(); if (genericClauses.length == 0) { // If no sub-queries or not queries, return an empty query. if (subMap.isEmpty()) return null; // If there's only one field, we don't need (or want) to do dechunking // at this level. Simply make a span query for this field. // if (fields.size() == 1) { QueryEntry ent = (QueryEntry)subMap.get(fields.get(0)); if (ent.nots.isEmpty()) { return processSpanJoin(name, ent.queries, ent.nots, maxSnippets); } } } // Process each field in turn, grouping queries into SpanQueries if // possible. // for (int i = 0; i < fields.size(); i++) { QueryEntry ent = (QueryEntry)subMap.get(fields.get(i)); int nQueries = ent.queries.size(); int nNots = ent.nots.size(); // If there's more than one query for this field, or if there is one // query and one or more nots, group them together with a span query. // if (nQueries > 1 || (nQueries == 1 && nNots > 0)) { SpanQuery sq = processSpanJoin(name, ent.queries, ent.nots, maxSnippets); bq.add(deChunk(sq), require ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD); continue; } // Otherwise, simply add these as normal boolean clauses (of course // applying dechunking if necessary.) // for (int j = 0; j < ent.queries.size(); j++) { bq.add(deChunk((Query)ent.queries.get(j)), require ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD); } for (int j = 0; j < ent.nots.size(); j++) bq.add(deChunk((Query)ent.nots.get(j)), BooleanClause.Occur.MUST_NOT); } // for i // If we ended up with only one clause, we may have more to do... BooleanClause[] clauses = bq.getClauses(); if (clauses.length == 1) { // If the clause is required, just return it. if (clauses[0].getOccur() == BooleanClause.Occur.MUST) return clauses[0].getQuery(); // If the clause is a 'not', it needs something to 'not' against. // Add another clause that just returns all valid documents. // else if (clauses[0].getOccur() == BooleanClause.Occur.MUST_NOT) { Query allDocsQuery = new TermQuery(new Term("docInfo", "1")); bq.add(allDocsQuery, BooleanClause.Occur.MUST); } } // Simplify the BooleanQuery (if possible), for instance collapsing // an AND query inside another AND query. // return simplifyBooleanQuery(bq); } // parseQuery2() /** * Parse a 'keyword' query, known internally as a multi-field AND. */ private Query parseMultiFieldQuery(EasyNode parent, String field, int maxSnippets) { // At the moment, only <and> and <or> are allowed to have multiple fields. String name = parent.name(); if (!name.matches("^(and|or)$")) error("multiple fields only supported for 'and' or 'or' queries"); // First, check that no regular 'field' has been specified. if (field != null) error("multi-field query requires 'fields' attribute, not 'field'"); // Make sure 'fields' is present. String fieldsStr = parseStringAttrib(parent, "fields"); // Parse that into an array of fields. ArrayList<String> fields = new ArrayList<String>(); StringTokenizer st = new StringTokenizer(fieldsStr, ";,| \t"); while (st.hasMoreTokens()) fields.add(st.nextToken()); // Make sure slop has been specified int slop = parseIntAttrib(parent, "slop"); // Optionally, the user can specify separate maxSnippets for text vs. // meta-data. // int maxMetaSnippets = parseIntAttrib(parent, "maxMetaSnippets", maxSnippets); int maxTextSnippets = parseIntAttrib(parent, "maxTextSnippets", maxSnippets); // Also, the user can specify a boost factor per field float[] boosts = null; if (parent.hasAttr("boosts")) { boosts = parseFieldBoosts(parent, "boosts"); if (boosts != null && boosts.length > fields.size()) error("'boosts' attribute may not contain more values than 'fields'"); } // Now parse all the sub-queries. ArrayList queryList = new ArrayList(); Vector<Query> notVec = new Vector<Query>(); for (int i = 0; i < parent.nChildren(); i++) { EasyNode el = parent.child(i); if (!el.isElement()) continue; else if (el.name().equalsIgnoreCase("resultData")) continue; // ignore, handled by client's resultFormatter.xsl else if (el.name().equalsIgnoreCase("not")) notVec.add(parseQuery2(el, "not", fieldsStr, maxSnippets)); else { Query q = parseQuery(el, fieldsStr, maxSnippets); if (q == null) continue; if (!(q instanceof SpanQuery)) error("Internal error: sub-queries of multi-field query must be span queries"); queryList.add(q); } } // If no sub-queries, there's nothing to do. if (queryList.isEmpty()) return null; // Form the final query. SpanQuery[] subQueries = (SpanQuery[])queryList.toArray( new SpanQuery[queryList.size()]); return createMultiFieldQuery(parent, fields.toArray(new String[fields.size()]), boosts, subQueries, notVec, slop, maxMetaSnippets, maxTextSnippets); } // parseMultiFieldQuery() /** * Does the work of creating the guts of a keyword query. */ private Query createMultiFieldQuery(EasyNode parent, String[] fields, float[] boosts, SpanQuery[] spanQueries, Vector<Query> notVec, int slop, int maxMetaSnippets, int maxTextSnippets) { BooleanQuery mainQuery = new BooleanQuery(true /* disable coord */); // We'll be changing the field names a lot. RefieldingQueryRewriter refielder = new RefieldingQueryRewriter(); // If it's an AND (as opposed to OR)... if (parent.name().equals("and")) { // Form a clause for each term, across all fields. This implements: // // And( // term1 in field1 or field2 or field3... // term2 in field1 or field2 or field3... // .. // ) // for (int i = 0; i < spanQueries.length; i++) { BooleanQuery termOrQuery = new BooleanQuery(); for (int j = 0; j < fields.length; j++) { Query tq = refielder.refield(spanQueries[i], fields[j]); if (!notVec.isEmpty()) { Vector refieldedNotVec = new Vector(); for (Query nq : notVec) refieldedNotVec.add(refielder.refield(nq, fields[j])); tq = processSpanNots((SpanQuery)tq, refieldedNotVec, 0); } tq = deChunk(tq); if (tq instanceof SpanQuery) ((SpanQuery)tq).setSpanRecording(0); termOrQuery.add(tq, BooleanClause.Occur.SHOULD); } // Make sure these don't contribute to the overall score, but each // term must match in at least one field. // termOrQuery.setBoost(0.0f); mainQuery.add(termOrQuery, BooleanClause.Occur.MUST); } } // For highlighting and scoring computations, make a clause for // each field, searching for all terms if present. This implements: // // Or( // OrNear(field1: term1,term2,...) // OrNear(field2: term1,term2,...) // .. // ) // for (int i = 0; i < fields.length; i++) { SpanQuery[] termQueries = new SpanQuery[spanQueries.length]; for (int j = 0; j < spanQueries.length; j++) { termQueries[j] = (SpanQuery)refielder.refield(spanQueries[j], fields[i]); if (!notVec.isEmpty()) { Vector refieldedNotVec = new Vector(); for (Query nq : notVec) refieldedNotVec.add(refielder.refield(nq, fields[i])); termQueries[j] = processSpanNots(termQueries[j], refieldedNotVec, 0); } } SpanQuery fieldOrQuery = (SpanQuery)deChunk(new SpanOrNearQuery(termQueries, slop, true)); int maxSnippets = (fields[i].equals("text")) ? maxTextSnippets : maxMetaSnippets; fieldOrQuery.setSpanRecording(maxSnippets); if (boosts != null && i < boosts.length) fieldOrQuery.setBoost(boosts[i]); mainQuery.add(fieldOrQuery, BooleanClause.Occur.SHOULD); } // All done. return simplifyBooleanQuery(mainQuery); } // createMultiFieldQuery() /** * Simplify a BooleanQuery that contains other BooleanQuery/ies with the * same type of clauses. If there's any boosting involved, don't do * the optimization. */ private Query simplifyBooleanQuery(BooleanQuery bq) { boolean anyBoosting = false; boolean anyBoolSubs = false; boolean allSame = true; boolean first = true; boolean prevRequired = true; boolean prevProhibited = true; // Scan each clause. BooleanClause[] clauses = bq.getClauses(); for (int i = 0; i < clauses.length; i++) { // See if this clause is the same as the previous one. if (!first && (prevRequired != (clauses[i].getOccur() == BooleanClause.Occur.MUST) || prevProhibited != (clauses[i].getOccur() == BooleanClause.Occur.MUST_NOT))) allSame = false; prevRequired = clauses[i].getOccur() == BooleanClause.Occur.MUST; prevProhibited = clauses[i].getOccur() == BooleanClause.Occur.MUST_NOT; first = false; // Detect any boosting if (clauses[i].getQuery().getBoost() != 1.0f) anyBoosting = true; // If the clause is a BooleanQuery, check the sub-clauses... if (clauses[i].getQuery() instanceof BooleanQuery) { BooleanQuery subQuery = (BooleanQuery)clauses[i].getQuery(); BooleanClause[] subClauses = subQuery.getClauses(); // Scan each sub-clause for (int j = 0; j < subClauses.length; j++) { // Make sure it's the same as the previous clause. if (prevRequired != (subClauses[j].getOccur() == BooleanClause.Occur.MUST) || prevProhibited != (subClauses[j].getOccur() == BooleanClause.Occur.MUST_NOT)) allSame = false; prevRequired = (subClauses[j].getOccur() == BooleanClause.Occur.MUST); prevProhibited = (subClauses[j].getOccur() == BooleanClause.Occur.MUST_NOT); // Detect any boosting. if (subClauses[j].getQuery().getBoost() != 1.0f) anyBoosting = true; } // for j // Note that we found at least one BooleanQuery clause. anyBoolSubs = true; } } // for i // If the main BooleanQuery doesn't meet all of our criteria for // simplification, simply return it unmodified. // if (!anyBoolSubs || !allSame || anyBoosting) return bq; // Create a new, simplified, query. bq = new BooleanQuery(); for (int i = 0; i < clauses.length; i++) { if (clauses[i].getQuery() instanceof BooleanQuery) { BooleanQuery subQuery = (BooleanQuery)clauses[i].getQuery(); BooleanClause[] subClauses = subQuery.getClauses(); for (int j = 0; j < subClauses.length; j++) bq.add(subClauses[j]); } else bq.add(clauses[i]); } // And we're done. return bq; } // simplifyBooleanQuery() /** * Parse an attribute on the main query element (or, for backward * compatability, on its immediate children.) * * If the attribute isn't recognized, an error exception is thrown. */ void parseMainAttrib(EasyNode el, String attrName, String val) { if (attrName.equals("style")) req.displayStyle = onceOnlyPath(req.displayStyle, el, attrName); else if (attrName.equals("startDoc")) { req.startDoc = onceOnlyAttrib(req.startDoc + 1, el, attrName); // Adjust for 1-based start doc. req.startDoc = Math.max(0, req.startDoc - 1); } else if (attrName.equals("maxDocs")) req.maxDocs = onceOnlyAttrib(req.maxDocs, el, attrName); else if (attrName.equals("indexPath")) req.indexPath = onceOnlyAttrib(req.indexPath, el, attrName); else if (attrName.equals("termLimit")) req.termLimit = onceOnlyAttrib(req.termLimit, el, attrName); else if (attrName.equals("workLimit")) req.workLimit = onceOnlyAttrib(req.workLimit, el, attrName); else if (attrName.equals("sortDocsBy") || attrName.equals("sortMetaFields")) // old, for compatibility req.sortMetaFields = onceOnlyAttrib(req.sortMetaFields, el, attrName); else if (attrName.equals("returnMetaFields")) req.returnMetaFields = onceOnlyAttrib(req.returnMetaFields, el, attrName); else if (attrName.equals("maxContext") || attrName.equals("contextChars")) req.maxContext = onceOnlyAttrib(req.maxContext, el, attrName); else if (attrName.equals("termMode")) { int oldTermMode = req.termMode; if (val.equalsIgnoreCase("none")) req.termMode = ContextMarker.MARK_NO_TERMS; else if (val.equalsIgnoreCase("hits")) req.termMode = ContextMarker.MARK_SPAN_TERMS; else if (val.equalsIgnoreCase("context")) req.termMode = ContextMarker.MARK_CONTEXT_TERMS; else if (val.equalsIgnoreCase("all")) req.termMode = ContextMarker.MARK_ALL_TERMS; else error( "Unknown value for 'termMode'; expecting " + "'none', 'hits', 'context', or 'all'"); if (specifiedGlobalAttrs.contains(attrName) && req.termMode != oldTermMode) { error("'termMode' attribute should only be specified once."); } specifiedGlobalAttrs.add(attrName); } else if (attrName.equalsIgnoreCase("boostSet")) { if (req.boostSetParams == null) req.boostSetParams = new BoostSetParams(); req.boostSetParams.path = onceOnlyPath(req.boostSetParams.path, el, attrName); } else if (attrName.equalsIgnoreCase("boostSetField")) { if (req.boostSetParams == null) req.boostSetParams = new BoostSetParams(); req.boostSetParams.field = parseStringAttrib(el, attrName); } else if (attrName.equalsIgnoreCase("boostSetExponent")) { if (req.boostSetParams == null) req.boostSetParams = new BoostSetParams(); req.boostSetParams.exponent = parseFloatAttrib(el, attrName); } else if (attrName.equalsIgnoreCase("boostSetDefault")) { if (req.boostSetParams == null) req.boostSetParams = new BoostSetParams(); req.boostSetParams.defaultBoost = parseFloatAttrib(el, attrName); } else if (attrName.equalsIgnoreCase("normalizeScores")) req.normalizeScores = parseBooleanAttrib(el, "normalizeScores"); else if (attrName.equalsIgnoreCase("explainScores")) req.explainScores = parseBooleanAttrib(el, "explainScores"); else if (attrName.equals("field") || attrName.equals("metaField")) ; // handled elsewhere else if (attrName.equals("fields") && el.name().matches("^(and|or)$")) ; // handled elsewhere else if ((attrName.equals("inclusive") || attrName.equals("numeric")) && el.name().equals("range")) ; // handled elsewhere else if (attrName.equals("slop") && el.name().matches("^(near|orNear|orderedNear)$")) ; // handled elsewhere else if (attrName.matches("^(slop|boosts)$") && el.name().matches("^(and|or)$") && el.hasAttr("fields")) ; // handled elsewhere else if (attrName.matches("^(maxTextSnippets|maxMetaSnippets)$") && el.name().matches("^(and|or)$") && el.hasAttr("fields")) ; // handled elsewhere else if (attrName.equalsIgnoreCase("useProximity") && el.name().matches("^(and|or)$")) ; // handled elsewhere else if (attrName.matches( "^(fields|boosts|minWordLen|maxWordLen|minDocFreq|maxDocFreq|minTermFreq|termBoost|maxQueryTerms)$") && el.name().equals("moreLike")) ; // handled elsewhere else { error( "Unrecognized attribute \"" + attrName + "\" " + "on <" + el.name() + "> element"); } } // parseMainAttrib() /** * Parse a 'sectionType' query element, if one is present. If not, * simply returns null. */ private SpanQuery parseSectionType(EasyNode parent, String field, int maxSnippets) throws QueryGenException { // Find the sectionType element (if any) EasyNode sectionType = parent.child("sectionType"); if (sectionType == null) return null; // These sectionType queries only belong in the "text" field. if (!"text".equals(field)) error( "'sectionType' element is only appropriate in queries on the 'text' field"); // Make sure it only has one child. if (sectionType.nChildren() != 1) error("'sectionType' element requires exactly " + "one child element"); Query ret = parseQuery(sectionType.child(0), "sectionType", maxSnippets); if (!(ret instanceof SpanQuery)) error("'sectionType' sub-query must use proximity"); return (SpanQuery)ret; } // parseSectionType() /** * Parse a 'subDocument' query element, if one is present. If not, * simply returns null. */ private SpanQuery parseSubDocument(EasyNode parent, String field, int maxSnippets) throws QueryGenException { // Find the subDocument element (if any) EasyNode subDocument = parent.child("subDocument"); if (subDocument == null) return null; // These subDocument queries only belong in the "text" field. if (!"text".equals(field)) error( "'subDocument' element is only appropriate in queries on the 'text' field"); // Make sure it only has one child. if (subDocument.nChildren() != 1) error("'subDocument' element requires exactly " + "one child element"); Query ret = parseQuery(subDocument.child(0), "subDocument", maxSnippets); if (!(ret instanceof SpanQuery)) error("'subDocument' sub-query must use proximity"); return (SpanQuery)ret; } // parseSubDocument() /** * If the given element has a 'field' attribute, return its value; * otherwise return 'parentField'. Also checks that field cannot be * specified if parentField has already been. */ private String parseField(EasyNode el, String parentField) throws QueryGenException { if (!el.hasAttr("metaField") && !el.hasAttr("field")) return parentField; String attVal = el.attrValue("field"); if (attVal == null || attVal.length() == 0) attVal = el.attrValue("metaField"); if (attVal.length() == 0) error("'field' attribute cannot be empty"); if (attVal.equals("sectionType") && (parentField == null || !parentField.equals("sectionType"))) error("'sectionType' is not valid for the 'field' attribute"); if (attVal.equals("subDocument") && (parentField == null || !parentField.equals("subDocument"))) error("'subDocument' is not valid for the 'field' attribute"); if (parentField != null && !parentField.equals(attVal)) error("Cannot override ancestor 'field' attribute"); return attVal; } /** * Joins a number of span queries together using a span query. * * @param name 'and', 'or', 'near', etc. * @param subVec Vector of sub-clauses * @param notVec Vector of not clauses (may be empty) * * @return A new Span query joining the sub-clauses. */ private SpanQuery processSpanJoin(String name, Vector subVec, Vector notVec, int maxSnippets) { // Get a handy array of the queries. SpanQuery[] subQueries = (SpanQuery[])subVec.toArray(new SpanQuery[0]); // If there's only one query (with no nots) then just return it. if (subQueries.length == 1 && notVec.isEmpty()) return subQueries[0]; // Now make the top-level query. SpanQuery q; if (subQueries.length == 1) q = subQueries[0]; else if (name.equals("orNear")) { // We can't know the actual slop until the query is run against // an index (the slop will be equal to max proximity). So set // it to a big value for now, and it will be clamped later // when the query is run. // q = new SpanOrNearQuery(subQueries, 999999999, true); } else if (name.equals("orderedNear")) { q = new SpanNearQuery(subQueries, 999999999, true); } else if (!name.equals("or")) { // We can't know the actual slop until the query is run against // an index (the slop will be equal to max proximity). So set // it to a big value for now, and it will be clamped later // when the query is run. // q = new SpanNearQuery(subQueries, 999999999, false); } else q = new SpanOrQuery(subQueries); q.setSpanRecording(maxSnippets); // Finish up by handling any not clauses found. return processSpanNots(q, notVec, maxSnippets); } // processSpanJoin() /** * Ensures that the given query, if it is a span query on the "text" * field, is wrapped by a de-chunking query. */ public static Query deChunk(Query q) { // We only need to de-chunk span queries, not other queries. if (!(q instanceof SpanQuery)) return q; // Furthermore, we only need to de-chunk queries on the "text" // field. // SpanQuery sq = (SpanQuery)q; if (!sq.getField().equals("text")) return q; // If it's already de-chunked, no need to do it again. if (sq instanceof SpanDechunkingQuery) return q; // Okay, wrap it. SpanDechunkingQuery dq = new SpanDechunkingQuery(sq); dq.setSpanRecording(sq.getSpanRecording()); return dq; } // deChunk() /** Determines if the term contains a wildcard character ('*' or '?') */ private boolean isWildcardTerm(Term term) { if (term.text().indexOf('*') >= 0) return true; if (term.text().indexOf('?') >= 0) return true; return false; } // isWildcardTerm() /** * Parse a range query. */ private Query parseRange(EasyNode parent, String field, int maxSnippets) throws QueryGenException { // Inclusive or exclusive? boolean inclusive = parseBooleanAttrib(parent, "inclusive", true); boolean numeric = parseBooleanAttrib(parent, "numeric", false); // Check the children for the lower and upper bounds. Term lower = null; Term upper = null; for (int i = 0; i < parent.nChildren(); i++) { EasyNode child = parent.child(i); if (!child.isElement()) continue; String name = child.name(); if (name.equals("lower")) { if (lower != null) error("'lower' only allowed once as child of 'range' element"); if (child.child("term") != null) lower = parseTerm(child.child("term"), field, "term"); else lower = parseTerm(child, field, "lower"); } else if (name.equals("upper")) { if (upper != null) error("'upper' only allowed once as child of 'range' element"); if (child.child("term") != null) upper = parseTerm(child.child("term"), field, "term"); else upper = parseTerm(child, field, "upper"); } else error( "'range' element may only have 'lower' and/or 'upper' " + "as child elements"); } // for iter // Upper, lower, or both must be specified. if (lower == null && upper == null) error("'range' element must have 'lower' and/or 'upper' child element(s)"); // And we're done. if (numeric) { return new NumericRangeQuery(field, (lower == null) ? null : lower.text(), (upper == null) ? null : upper.text(), inclusive, inclusive); } else { // If no upper specified, we're in danger of accidentally matching the // XTF special tokens. So be sure to exclude the whole area that marker // characters are in. // if (upper == null) { char[] tmp = new char[1]; tmp[0] = Constants.MARKER_BASE; upper = new Term(lower.field(), new String(tmp)); } // Now make the query. SpanQuery q = new XtfSpanRangeQuery(lower, upper, inclusive, req.termLimit); q.setSpanRecording(maxSnippets); return q; } } // parseRange() /** * If any 'not' clauses are present, this builds a query that filters them * out of the main query. */ SpanQuery processSpanNots(SpanQuery query, Vector notClauses, int maxSnippets) { // If there aren't any 'not' clauses, we're done. if (notClauses.isEmpty()) return query; // If there's only one, the sub-query is simple. SpanQuery subQuery; if (notClauses.size() == 1) subQuery = (SpanQuery)notClauses.get(0); else { // Otherwise, 'or' all the nots together. SpanQuery[] subs = (SpanQuery[])notClauses.toArray(new SpanQuery[0]); subQuery = new SpanOrQuery(subs); subQuery.setSpanRecording(maxSnippets); } // Now make the final 'not' query. If on the text field, // use the special chunk-aware version. // SpanQuery nq; if (query.getField().equals("text")) { // If a not is nested within another not, we need to avoid // double-dechunking. // if (query instanceof SpanDechunkingQuery) query = ((SpanDechunkingQuery)query).getWrapped(); // Note that the actual slop will have to be fixed when the // query is run. // nq = new SpanChunkedNotQuery(query, subQuery, 999999999); } else nq = new SpanNotNearQuery(query, subQuery, 999999999); // Establish the span recording, and we're done. nq.setSpanRecording(maxSnippets); return nq; } // processSpanNots() /** * Generate a proximity query on a field. This uses the de-duplicating span * system. * * @param parent The element containing the field name and terms. */ Query makeProxQuery(EasyNode parent, int slop, String field, int maxSnippets) throws QueryGenException { Vector terms = new Vector(); Vector notVec = new Vector(); for (int i = 0; i < parent.nChildren(); i++) { EasyNode el = parent.child(i); if (!el.isElement()) continue; if (el.name().equals("not")) { if (parent.name().matches("^(phrase|exact)$")) error("'not' clauses aren't supported in phrase/exact queries"); // Make sure to avoid adding the 'not' terms to the term map, // since it would be silly to hilight them. // notVec.add(parseQuery2(el, "not", field, maxSnippets)); } else if (el.name().matches("^(sectionType|subDocument)$")) continue; // handled elsewhere else { SpanQuery q; if (slop == 0) { Term t = parseTerm(el, field, "term"); if (isWildcardTerm(t)) q = new XtfSpanWildcardQuery(t, req.termLimit); else q = new SpanTermQuery(t); q.setSpanRecording(maxSnippets); terms.add(q); } else terms.add(parseQuery(el, field, maxSnippets)); } } if (terms.size() == 0) error("'" + parent.name() + "' element requires at " + "least one term"); // Handle 'exact' queries specially. SpanQuery q; SpanQuery[] termQueries = (SpanQuery[])terms.toArray( new SpanQuery[terms.size()]); if (slop < 0) q = new SpanExactQuery(termQueries); // Optimization: treat a single-term 'all' query as just a simple // term query. // else if (terms.size() == 1) q = (SpanQuery)terms.elementAt(0); // Handle orNear queries specially. else if (parent.name().equals("orNear")) q = new SpanOrNearQuery(termQueries, slop, true); // Ordered near - true means in order else if (parent.name().equals("orderedNear")) q = new SpanNearQuery(termQueries, slop, true); // Make a 'near' query out of it. Zero slop implies in-order. else q = new SpanNearQuery(termQueries, slop, slop == 0); // Set up the span recording, and add in any nots present. q.setSpanRecording(maxSnippets); return processSpanNots(q, notVec, maxSnippets); } // makeProxQuery() /** * Parses a "more like this" query. */ private Query parseMoreLike(EasyNode parent, String field, int maxSnippets) { // First, parse the sub-query. Query subQuery = null; for (int i = 0; i < parent.nChildren(); i++) { EasyNode el = parent.child(i); if (!el.isElement()) continue; else if (el.name().equalsIgnoreCase("resultData")) continue; // ignore, handled by client's resultFormatter.xsl if (subQuery != null) error("'moreLike' element may not have more than one sub-query"); subQuery = parseQuery(el, field, 0); // no snippets } if (subQuery == null) error("'moreLike' element requires a sub-query"); // Form up the result. MoreLikeThisQuery ret = new MoreLikeThisQuery(subQuery); // Process any optional attributes. for (int i = 0; i < parent.nAttrs(); i++) { String attrName = parent.attrName(i); if (attrName.equalsIgnoreCase("minWordLen")) ret.setMinWordLen(parseIntAttrib(parent, attrName)); else if (attrName.equalsIgnoreCase("maxWordLen")) ret.setMaxWordLen(parseIntAttrib(parent, attrName)); else if (attrName.equalsIgnoreCase("minDocFreq")) ret.setMinDocFreq(parseIntAttrib(parent, attrName)); else if (attrName.equalsIgnoreCase("maxDocFreq")) ret.setMaxDocFreq(parseIntAttrib(parent, attrName)); else if (attrName.equalsIgnoreCase("minTermFreq")) ret.setMinTermFreq(parseIntAttrib(parent, attrName)); else if (attrName.equalsIgnoreCase("termBoost")) ret.setBoost(parseBooleanAttrib(parent, attrName)); else if (attrName.equalsIgnoreCase("maxQueryTerms")) ret.setMaxQueryTerms(parseIntAttrib(parent, attrName)); else if (attrName.equalsIgnoreCase("fields")) ret.setFieldNames(parseFieldNames(parent, attrName)); else if (attrName.equalsIgnoreCase("boosts")) ret.setFieldBoosts(parseFieldBoosts(parent, attrName)); else error("Unrecognized attribute '" + attrName + "' on 'moreLike' element"); } // Make sure at least one field was specified. String[] fields = ret.getFieldNames(); if (fields == null || fields.length == 0) error( "At least one field name must be specified in 'fields' attribute on 'moreLike' query"); // Make sure that, if boosts were specified, there are the same number. float[] boosts = ret.getFieldBoosts(); if (boosts != null && boosts.length != fields.length) error( "Must specify same number of boosts as fields in 'boosts' attribute on 'moreLike' query"); // All done. return ret; } // parseMoreLike() /** * Parse a list of field names. They can be separated by spaces, tabs, * commas, semicolons, or pipe symbols. * * @param parent Node to look at * @param attrName Attribute to get the list from * @return Array of field names, or null if none. */ private String[] parseFieldNames(EasyNode parent, String attrName) { String val = parseStringAttrib(parent, attrName); StringTokenizer tok = new StringTokenizer(val, " \t\r\n,;|"); ArrayList list = new ArrayList(); while (tok.hasMoreTokens()) list.add(tok.nextToken()); if (list.size() > 0) return (String[])list.toArray(new String[list.size()]); else return null; } // parseFieldNames() /** * Parse a list of field boosts. They can be separated by spaces, tabs, * commas, semicolons, or pipe symbols. * * @param parent Node to look at * @param attrName Attribute to get the list from * @return Array of field boosts, or null if none. */ private float[] parseFieldBoosts(EasyNode parent, String attrName) { String val = parseStringAttrib(parent, attrName); StringTokenizer tok = new StringTokenizer(val, " \t\r\n,;|"); FloatList list = new FloatList(); while (tok.hasMoreTokens()) { String strVal = tok.nextToken(); try { list.add(Float.parseFloat(strVal)); } catch (NumberFormatException e) { error("Each value for 'boosts' must be a valid floating-point number"); } } if (list.size() > 0) return list.toArray(); else return null; } // parseFieldBoosts() /** * Parses a 'term' element. If not so marked, an exception is thrown. * * @param parent The element to parse */ private Term parseTerm(EasyNode parent, String field, String expectedName) throws QueryGenException { // Get field name if specified. field = parseField(parent, field); if (field == null) error( "'term' element requires 'field' attribute on " + "itself or an ancestor"); if (!parent.name().equals(expectedName)) error( "Expected '" + expectedName + "' as child of '" + parent.parent().name() + "' element, but found '" + parent.name() + "'"); String termText = getText(parent); if (termText == null || termText.length() == 0) error("Missing term text in element '" + parent.name() + "'"); // Make a term out of the field and the text. Term term = new Term(field, termText); return term; } // parseTerm() /** * Ensures that the element has only a single child node (ignoring * attributes), and that it's a text node. * * @param el The element to get the text of * @return The string value of the text */ private String getText(EasyNode el) throws QueryGenException { // There should be no element children, only text. int count = 0; String text = null; for (int i = 0; i < el.nChildren(); i++) { EasyNode n = el.child(i); if (!n.isElement() && !n.isText()) { count = -1; break; } if (n.isText()) text = n.toString(); count++; } if (count != 1) error("A single text node is required for the '" + el.name() + "' element"); return text; } // getText() /** * Like parseIntAttrib(), but adds additional processing to ensure that * global parameters are only specified once (or if multiple times, that * the same value is used each time.) * * @param oldVal Current value of the global parameter * @param el Element to get the attribute from * @param attribName Name of the attribute * @return New value for the parameter */ private int onceOnlyAttrib(int oldVal, EasyNode el, String attribName) { int newVal = parseIntAttrib(el, attribName); if (specifiedGlobalAttrs.contains(attribName) && newVal != oldVal) { error("'" + attribName + "' attribute should only be specified once."); } specifiedGlobalAttrs.add(attribName); return newVal; } // onceOnlyAttrib() /** * Like parseStringAttrib(), but adds additional processing to ensure that * global parameters are only specified once (or if multiple times, that * the same value is used each time.) * * @param oldVal Current value of the global parameter * @param el Element to get the attribute from * @param attribName Name of the attribute * @return New value for the parameter */ private String onceOnlyAttrib(String oldVal, EasyNode el, String attribName) { String newVal = parseStringAttrib(el, attribName); if (specifiedGlobalAttrs.contains(attribName) && !oldVal.equals(newVal)) { error("'" + attribName + "' attribute should only be specified once."); } specifiedGlobalAttrs.add(attribName); return newVal; } // onceOnlyAttrib() /** * Like onceOnlyAttrib(), but also ensures that the given file can * actually be resolved as a path that can be read. * * @param oldVal Current value of the global parameter * @param el Element to get the attribute from * @param attribName Name of the attribute * @return New value for the parameter */ private String onceOnlyPath(String oldVal, EasyNode el, String attribName) { String newVal = parseStringAttrib(el, attribName); String path; if (newVal.startsWith("http:")) path = newVal; else path = Path.resolveRelOrAbs(baseDir, newVal); if (specifiedGlobalAttrs.contains(attribName) && !oldVal.equals(path)) { error("'" + attribName + "' attribute should only be specified once."); } specifiedGlobalAttrs.add(attribName); if (!path.startsWith("http:") && !newVal.equals("NullStyle.xsl") && !(new File(path).canRead())) { error( "File \"" + newVal + "\" specified in '" + el.name() + "' element " + "does not exist"); } return path; } // onceOnlyPath() /** * Locate the named attribute and retrieve its value as an integer. * If not found, an error exception is thrown. * * @param el Element to search * @param attribName Attribute to find */ private int parseIntAttrib(EasyNode el, String attribName) throws QueryGenException { return parseIntAttrib(el, attribName, false, 0); } /** * Locate the named attribute and retrieve its value as an integer. * If not found, return a default value. * * @param el EasyNode to search * @param attribName Attribute to find * @param defaultVal If not found and useDefault is true, return this * value. */ private int parseIntAttrib(EasyNode el, String attribName, int defaultVal) throws QueryGenException { return parseIntAttrib(el, attribName, true, defaultVal); } /** * Locate the named attribute and retrieve its value as an integer. * Handles default processing if requested. * * @param el EasyNode to search * @param attribName Attribute to find * @param useDefault true to supply a default value if none found, * false to throw an exception if not found. * @param defaultVal If not found and useDefault is true, return this * value. */ private int parseIntAttrib(EasyNode el, String attribName, boolean useDefault, int defaultVal) throws QueryGenException { String elName = el.name(); String str = parseStringAttrib(el, attribName, useDefault, null); if (str == null && useDefault) return defaultVal; if (str.equals("all")) return 999999999; try { return Integer.parseInt(str); } catch (Exception e) { error( "'" + attribName + "' attribute of '" + elName + "' element is not a valid integer"); return 0; } } // parseIntAttrib() /** * Locate the named attribute and retrieve its value as a float. * If not found, an error exception is thrown. * * @param el Element to search * @param attribName Attribute to find */ private float parseFloatAttrib(EasyNode el, String attribName) throws QueryGenException { return parseFloatAttrib(el, attribName, false, 0); } /** * Locate the named attribute and retrieve its value as a float. * If not found, return a default value. * * @param el EasyNode to search * @param attribName Attribute to find * @param defaultVal If not found and useDefault is true, return this * value. */ @SuppressWarnings("unused") private float parseFloatAttrib(EasyNode el, String attribName, float defaultVal) throws QueryGenException { return parseFloatAttrib(el, attribName, true, defaultVal); } /** * Locate the named attribute and retrieve its value as a float. Negative * values are not allowed. Handles default processing if requested. * * @param el EasyNode to search * @param attribName Attribute to find * @param useDefault true to supply a default value if none found, * false to throw an exception if not found. * @param defaultVal If not found and useDefault is true, return this * value. */ private float parseFloatAttrib(EasyNode el, String attribName, boolean useDefault, float defaultVal) throws QueryGenException { String elName = el.name(); String str = parseStringAttrib(el, attribName, useDefault, null); if (str == null && useDefault) return defaultVal; try { float ret = Float.parseFloat(str); if (ret < 0) { error( "'" + attribName + "' attribute of '" + elName + "' element is not allowed to be negative"); } return ret; } catch (NumberFormatException e) { error( "'" + attribName + "' attribute of '" + elName + "' element is not a valid floating-point number"); return 0; } } // parseFloatAttrib() /** * Locate the named attribute and retrieve its value as an boolean. * If not found, an error exception is thrown. * * @param el Element to search * @param attribName Attribute to find */ private boolean parseBooleanAttrib(EasyNode el, String attribName) throws QueryGenException { return parseBooleanAttrib(el, attribName, false, false); } /** * Locate the named attribute and retrieve its value as an boolean. * If not found, return a default value. * * @param el EasyNode to search * @param attribName Attribute to find * @param defaultVal If not found and useDefault is true, return this * value. */ private boolean parseBooleanAttrib(EasyNode el, String attribName, boolean defaultVal) throws QueryGenException { return parseBooleanAttrib(el, attribName, true, defaultVal); } /** * Locate the named attribute and retrieve its value as an boolean. * Handles default processing if requested. * * @param el EasyNode to search * @param attribName Attribute to find * @param useDefault true to supply a default value if none found, * false to throw an exception if not found. * @param defaultVal If not found and useDefault is true, return this * value. */ private boolean parseBooleanAttrib(EasyNode el, String attribName, boolean useDefault, boolean defaultVal) throws QueryGenException { String elName = el.name(); String str = parseStringAttrib(el, attribName, useDefault, null); if (str == null && useDefault) return defaultVal; if (str.matches("^(yes|true|1)$")) return true; else if (str.matches("^(no|false|0)$")) return false; error( "'" + attribName + "' attribute of '" + elName + "' element is not a valid boolean (yes/no/true/false/1/0)"); return false; } // parseBooleanAttrib() /** * Locate the named attribute and retrieve its value as a string. If * not found, an error exception is thrown. * * @param el EasyNode to search * @param attribName Attribute to find */ private String parseStringAttrib(EasyNode el, String attribName) throws QueryGenException { return parseStringAttrib(el, attribName, false, null); } /** * Locate the named attribute and retrieve its value as a string. If * not found, return a default value. * * @param el EasyNode to search * @param attribName Attribute to find * @param defaultVal If not found, return this value. */ @SuppressWarnings("unused") private String parseStringAttrib(EasyNode el, String attribName, String defaultVal) throws QueryGenException { return parseStringAttrib(el, attribName, true, defaultVal); } /** * Locate the named attribute and retrieve its value as a string. * Handles default processing if requested. * * @param el EasyNode to search * @param attribName Attribute to find * @param useDefault true to supply a default value if none found, * false to throw an exception if not found. * @param defaultVal If not found and useDefault is true, return this * value. */ private String parseStringAttrib(EasyNode el, String attribName, boolean useDefault, String defaultVal) throws QueryGenException { String elName = el.name(); String str = el.attrValue(attribName); if (str == null) { if (!useDefault) error( "'" + elName + "' element must specify '" + attribName + "' attribute"); return defaultVal; } else if (str.length() == 0) { if (!useDefault) error( "'" + elName + "' element specified empty '" + attribName + "' attribute"); return defaultVal; } return str; } // parseStringAttrib() /** * Exception class used to report errors from the query generator. */ public class QueryFormatError extends GeneralException { public QueryFormatError(String message) { super(message); } public boolean isSevere() { return false; } } // class QueryFormatError /** Keeps track of all the queries for a given field */ private static class QueryEntry { public Vector queries = new Vector(); public Vector nots = new Vector(); public String field; public QueryEntry(String field) { this.field = field; } } // class QueryEntry }