package org.cdlib.xtf.crossQuery; /** * Copyright (c) 2004, Regents of the University of California * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the University of California nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ import java.io.File; import java.io.IOException; import java.io.StringReader; import java.net.SocketException; import java.text.DecimalFormat; import java.util.Properties; import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.xml.transform.Source; import javax.xml.transform.Templates; import javax.xml.transform.Transformer; import javax.xml.transform.stream.StreamSource; import net.sf.saxon.om.NodeInfo; import net.sf.saxon.trace.TraceListener; import net.sf.saxon.tree.TreeBuilder; import org.cdlib.xtf.servletBase.StylesheetCache; import org.cdlib.xtf.servletBase.TextConfig; import org.cdlib.xtf.servletBase.TextServlet; import org.cdlib.xtf.servletBase.RedirectException; import org.cdlib.xtf.textEngine.IndexUtil; import org.cdlib.xtf.textEngine.QueryProcessor; import org.cdlib.xtf.textEngine.QueryRequest; import org.cdlib.xtf.textEngine.QueryRequestParser; import org.cdlib.xtf.textEngine.QueryResult; import org.cdlib.xtf.util.AttribList; import org.cdlib.xtf.util.Trace; import org.cdlib.xtf.util.XMLFormatter; import org.cdlib.xtf.util.XMLWriter; import org.cdlib.xtf.util.XTFSaxonErrorListener; /** * The crossQuery servlet coordinates the process of parsing a URL query, * activating the textEngine to find all occurrences, and finally formatting * the results. */ public class CrossQuery extends TextServlet { /** Holds global servlet configuration info */ protected CrossQueryConfig config; /** Used to format decimal numbers */ protected static DecimalFormat decimalFormat = new DecimalFormat(); /** Used for stylesheet profiling, if enabled */ protected TimeProfilingListener profListener; /** * Called by the superclass to find out the name of our specific config * file. */ public String getConfigName() { return "conf/crossQuery.conf"; } /** * Loads the specific configuration file for crossQuery. */ protected TextConfig readConfig(String configPath) { // Load the configuration file. config = new CrossQueryConfig(this, configPath); // And we're done. return config; } // readConfig() /** * Retrieves the current configuration information (that was read in by * readConfig()). */ public TextConfig getConfig() { return config; } /** * Handles the HTTP 'get' method. Initializes the servlet if nececssary, * then parses the HTTP request and processes it appropriately. * * @param req The HTTP request (in) * @param res The HTTP response (out) * @exception IOException If unable to read an index or data file, or * if unable to write the output stream. */ public void doGet(HttpServletRequest req, HttpServletResponse res) throws IOException { try { // Set the default output content type res.setContentType("text/html"); // If profiling is enabled, we have to notify the stylesheet // cache. // StylesheetCache.TraceListenerFactory tlf = null; if (config.stylesheetProfiling) { tlf = new StylesheetCache.TraceListenerFactory() { public TraceListener createListener() { if (profListener == null) profListener = new TimeProfilingListener(); return profListener; } }; } stylesheetCache.enableProfiling(tlf); // If in step mode, output the frameset and top frame... String stepStr = stepSetup(req, res); if (stepStr != null) { ServletOutputStream out = res.getOutputStream(); out.println(stepStr); out.close(); return; } // Output extended debugging info if requested. Trace.debug("Processing request: " + getRequestURL(req)); // This does the bulk of the work. apply(makeAttribList(req), req, res); } catch (Exception e) { if (!(e instanceof RedirectException) && !(e instanceof SocketException)) { try { genErrorPage(req, res, e); } catch (RedirectException re) { } } return; } } // doGet() /** * Informational method required by Servlet interface. Doesn't seem to * matter what it says. * * @return A string describing this servlet. */ public String getServletInfo() { return "crossQuery search servlet"; } // getServletInfo() /** * Creates the query request, processes it, and formats the results. * * @param attribs Attributes to pass to the stylesheets. * @param req The original HTTP request * @param res Where to send the response * * @exception Exception Passes on various errors that might occur. */ protected void apply(AttribList attribs, HttpServletRequest req, HttpServletResponse res) throws Exception { // Record the start time. long startTime = System.currentTimeMillis(); // If a query router was specified, run it. QueryRoute queryRoute = runQueryRouter(req, res, attribs); if (queryRoute == null) return; // Generate a query request. QueryRequest queryReq = runQueryParser(req, res, queryRoute, attribs); if (queryReq == null) return; // Process it to generate result document hits QueryProcessor proc = createQueryProcessor(); QueryResult queryResult = proc.processRequest(queryReq); // Format the hits for the output document. formatHits("crossQueryResult", req, res, attribs, queryReq, queryResult, startTime); } // apply() /** * Creates a query request using the queryParser stylesheet and the given * attributes. * * @param req The original HTTP request * @param res The HTTP response (used for step mode only) * @param attribs Attributes to pass to the stylesheet. * @return A route to the parser, or null if before that step */ protected QueryRoute runQueryRouter(HttpServletRequest req, HttpServletResponse res, AttribList attribs) throws Exception { String step = req.getParameter("debugStep"); // If no router specified but a parser was, return a default route. // This is for backward compatibility. // if (config.queryRouterSheet == null) { if ("1b".equals(step)) { res.setContentType("text/html"); res.getOutputStream().println( "queryRouter stylesheet not specified;<br/> " + "using default route to: " + config.queryParserSheet); return null; } return QueryRoute.createDefault(config.queryParserSheet); } // Make a <parameters> block, without tokenizing XMLFormatter fmt = new XMLFormatter(); fmt.blankLineAfterTag(false); buildParamBlock(attribs, fmt, null, null); // If in step 1, just output the parameter block. if ("1b".equals(step)) { res.setContentType("text/xml"); res.getOutputStream().println(fmt.toString()); return null; } // Locate the stylesheet and make a tranformer. Templates sheet = stylesheetCache.find(config.queryRouterSheet); Transformer trans = sheet.newTransformer(); // Stuff all the common config properties into the transformer in // case the query router needs access to them. // stuffAttribs(trans, config.attribs); // Also stuff the URL parameters, in case it wants them that way. stuffAttribs(trans, attribs); // Add the special computed attributes. stuffSpecialAttribs(req, trans); if (Trace.getOutputLevel() >= Trace.debug) { String tmp = fmt.toString(); if (tmp.endsWith("\n")) tmp = tmp.substring(0, tmp.length() - 1); Trace.debug("*** queryRouter input ***\n" + tmp); } // Make sure errors get directed to the right place. if (!(trans.getErrorListener() instanceof XTFSaxonErrorListener)) trans.setErrorListener(new XTFSaxonErrorListener()); // Now perform the transformation. TreeBuilder output = new TreeBuilder(); trans.transform(fmt.toSource(), output); // Get the result. Source queryRouteDoc = output.getCurrentRoot(); if (Trace.getOutputLevel() >= Trace.debug) { Trace.debug( "*** queryRouter output ***\n" + XMLWriter.toString(queryRouteDoc, false)); } // Parse it into the final route. QueryRoute route = QueryRoute.parse((NodeInfo)queryRouteDoc); // Translate relative path, if necessary. route.queryParserSheet = getRealPath(route.queryParserSheet); // If the route specified an error generator, override the default. if (route.errorGenSheet != null) setErrorGenSheet(getRealPath(route.errorGenSheet)); // Record extra stuff for debugging/step mode route.routerInput = fmt.toString(); route.routerOutput = XMLWriter.toString(queryRouteDoc, false); // All done. return route; } // runQueryRouter() /** * Creates a query request using the queryParser stylesheet and the given * attributes. * * @param req The original HTTP request * @param res The HTTP response (used for step mode only) * @param route Route to the query parser * @param attribs Attributes to pass to the stylesheet. * @return A parsed query request, or null if before that step */ protected QueryRequest runQueryParser(HttpServletRequest req, HttpServletResponse res, QueryRoute route, AttribList attribs) throws Exception { // Make a <parameters> block. XMLFormatter fmt = new XMLFormatter(); fmt.blankLineAfterTag(false); buildParamBlock(attribs, fmt, route.tokenizerMap, route.routerOutput); // If in step 2, just output the parameter block. String step = req.getParameter("debugStep"); if ("2b".equals(step)) { res.setContentType("text/xml"); res.getOutputStream().println(fmt.toString()); return null; } // Locate the query formatting stylesheet. Templates genSheet = stylesheetCache.find(route.queryParserSheet); // Make a transformer for this specific query. Transformer trans = genSheet.newTransformer(); // Stuff all the common config properties into the transformer in // case the query generator needs access to them. // stuffAttribs(trans, config.attribs); // Also stuff the URL parameters, in case it wants them that way // instead of tokenized. // stuffAttribs(trans, attribs); // Add the special computed attributes. stuffSpecialAttribs(req, trans); if (Trace.getOutputLevel() >= Trace.debug) { String tmp = fmt.toString(); if (tmp.endsWith("\n")) tmp = tmp.substring(0, tmp.length() - 1); Trace.debug("*** queryParser input ***\n" + tmp); } // Make sure errors get directed to the right place. if (!(trans.getErrorListener() instanceof XTFSaxonErrorListener)) trans.setErrorListener(new XTFSaxonErrorListener()); // Now perform the transformation. TreeBuilder output = new TreeBuilder(); trans.transform(fmt.toSource(), output); // Get the result. Source queryReqDoc = output.getCurrentRoot(); // Output useful debug info if (Trace.getOutputLevel() >= Trace.debug) { Trace.debug("*** queryParser output ***\n" + XMLWriter.toString(queryReqDoc, false)); } // Shunt if necessary (for instance, in step mode) if (shuntQueryReq(req, res, queryReqDoc)) return null; // Process it to generate result document hits QueryRequest queryReq = new QueryRequestParser().parseRequest(queryReqDoc, new File( getRealPath(""))); // Fill in the auxiliary info queryReq.parserInput = fmt.toString(); queryReq.parserOutput = XMLWriter.toString(queryReqDoc, false); // All done. return queryReq; } // runQueryParser() /** * Called right after the raw query request has been generated, but * before it is parsed. Gives us a chance to stop processing here in * step mode. */ protected boolean shuntQueryReq(HttpServletRequest req, HttpServletResponse res, Source queryReqDoc) throws IOException { // If we're on step 2b, simply output the query request. String step = req.getParameter("debugStep"); if ("3b".equals(step)) { res.setContentType("text/xml"); res.getOutputStream().println(XMLWriter.toString(queryReqDoc)); return true; } return false; } // shuntQueryReq() /** * Formats a list of hits using the resultFormatter stylesheet. * * @param mainTagName Name of the top-level tag to generate (e.g. * "crossQueryResult", etc.) * @param req The original HTTP request * @param res Where to send the HTML response * @param attribs Parameters to pass to the stylesheet * @param queryRequest Query request that produced the hits * @param queryResult Hits resulting from the query request * @param startTime Time (in milliseconds) request began */ protected void formatHits(String mainTagName, HttpServletRequest req, HttpServletResponse res, AttribList attribs, QueryRequest queryRequest, QueryResult queryResult, long startTime) throws Exception { // If we are in raw mode (or on step 4 in step mode), use a null // transform instead of the stylesheet. // Transformer trans; String raw = req.getParameter("raw"); if (raw == null) raw = (String) req.getAttribute("org.cdlib.xtf.servlet.raw"); String step = req.getParameter("debugStep"); if ("yes".equals(raw) || "true".equals(raw) || "1".equals(raw) || "4b".equals(step)) { res.setContentType("text/xml"); trans = IndexUtil.createTransformer(); Properties props = trans.getOutputProperties(); props.put("indent", "yes"); props.put("method", "xml"); trans.setOutputProperties(props); } else { // Locate the display stylesheet. Templates displaySheet = stylesheetCache.find(queryRequest.displayStyle); // Figure out the output mime type res.setContentType(calcMimeType(displaySheet)); // Make a transformer for this specific query. trans = displaySheet.newTransformer(); } // Stuff all the common config properties into the transformer in // case the query generator needs access to them. // stuffAttribs(trans, config.attribs); // Also stuff the URL parameters (in case stylesheet wants them) stuffAttribs(trans, attribs); // Add the special computed parameters. stuffSpecialAttribs(req, trans); // Make an input document for it based on the document hits. Insert // an attribute documenting how long the query took, including // formatting the hits. // StringBuffer extraStuff = new StringBuffer(); if (queryRequest.parserInput != null) extraStuff.append(queryRequest.parserInput); if (queryRequest.parserOutput != null) extraStuff.append(queryRequest.parserOutput); String hitsString = queryResult.hitsToString(mainTagName, extraStuff.toString()); String prefix = "<" + mainTagName + " "; assert hitsString.startsWith(prefix); long queryTime = System.currentTimeMillis() - startTime; String formattedTime = decimalFormat.format(queryTime / 1000.0); hitsString = prefix + "queryTime=\"" + formattedTime + "\" " + hitsString.substring(prefix.length()); Source sourceDoc = new StreamSource(new StringReader(hitsString)); // Make sure errors get directed to the right place. if (!(trans.getErrorListener() instanceof XTFSaxonErrorListener)) trans.setErrorListener(new XTFSaxonErrorListener()); // Do it! trans.transform(sourceDoc, createFilteredReceiver(trans, req, res)); // Print profile if requested. if (config.stylesheetProfiling) { Trace.info("Profile for request: " + getRequestURL(req)); Trace.tab(); profListener.printProfile(); Trace.untab(); Trace.info("End of profile."); } } // formatHits() /** * Checks if step mode is active and in the setup phase. If so, outputs * the frameset and information frames. * * @param req The HTTP request (in) * @param res The HTTP response (out) * * @return A string to output if in step setup phase, null to do normal * processing */ protected String stepSetup(HttpServletRequest req, HttpServletResponse res) throws IOException { // Start with the basics. By the way, we do *not* want to translate // special characters in the URL, e.g. if a query term contains an // ampersand. // String baseUrl = req.getRequestURL().toString(); // Sometimes we don't get the query parameters, sometimes we do. If // we didn't get them but there are some, add them on. // if (baseUrl.indexOf('?') < 0 && req.getQueryString() != null && req.getQueryString().length() > 0) { baseUrl = baseUrl + "?" + req.getQueryString(); } baseUrl = baseUrl.replaceAll("\"", """); // because we're embedding in HTML String step = req.getParameter("debugStep"); if (step == null || step.length() == 0) return null; // Output the frame set, with two frames: one for info, one for data. if (step.matches("^[0-9]$")) { String urlA = baseUrl.replaceAll("debugStep=" + step, "debugStep=" + step + "a"); String urlB = baseUrl.replaceAll("debugStep=" + step, "debugStep=" + step + "b"); return "<html>\n" + " <head>\n" + " <title>crossQuery Step " + step + "</title>\n" + " </head>\n" + " <frameset rows=\"195,*\" border=\"2\" framespacing=\"2\" " + "frameborder=\"1\">\n" + " <frame title=\"Info\" name=\"info\" src=\"" + urlA + "\">\n" + " <frame title=\"Data\" name=\"data\" src=\"" + urlB + "\">\n" + " </frameset>\n" + "</html>"; } // Output the contents of the info frame if (step.matches("^[0-9]a$")) { int stepNum = Integer.parseInt(step.substring(0, 1)); StringBuffer out = new StringBuffer(); out.append( "<html>\n" + " <body>\n" + " <b><i>crossQuery</b></i> Step " + stepNum + "    "); String prevUrl = (stepNum == 1) ? null : baseUrl.replaceAll("debugStep=" + step, "debugStep=" + (stepNum - 1)); String nextUrl = (stepNum == 5) ? null : baseUrl.replaceAll("debugStep=" + step, "debugStep=" + (stepNum + 1)); if (stepNum > 1) out.append("<a href=\"" + prevUrl + "\" target=\"_top\">[Previous]</a> "); else out.append("<font color=\"#C0C0C0\">[Previous]</font> "); if (stepNum < 5) out.append("<a href=\"" + nextUrl + "\" target=\"_top\">[Next]</a>"); else out.append("<font color=\"#C0C0C0\">[Next]</font>"); out.append( " <table cellspacing=\"5\" cellpadding=\"0\">\n" + " <tr>\n"); for (int i = 1; i <= 5; i++) { if (i == stepNum) out.append("<td bgcolor=\"#E0E0E0\"><b>"); else out.append("<td>"); if (i != stepNum) { String link = baseUrl.replaceAll("debugStep=" + step, "debugStep=" + i); out.append("<a href=\"" + link + "\" target=\"_top\">"); } out.append("Step " + i + "<br>"); if (i != stepNum) out.append("</a>"); switch (i) { case 1: out.append("Raw URL parameters"); break; case 2: out.append("Tokenized URL parameters"); break; case 3: out.append("XML query"); break; case 4: out.append("Raw results"); break; case 5: out.append("Formatted results"); break; } out.append("</td>"); if (i < 5) { out.append("<td>--></td><td>"); switch (i) { case 1: out.append("Query Router"); break; case 2: out.append("Query Parser"); break; case 3: out.append("Text Engine"); break; case 4: out.append("Result Formatter"); break; } out.append("</td><td>--></td>"); } if (i == stepNum) out.append("</b>"); out.append("</td>\n"); } out.append(" </tr>\n" + " </table>\n"); switch (stepNum) { case 1: if (config.queryRouterSheet == null) { out.append( "Step 1 is the raw URL parameters to be fed to the " + "<b>Query Router</b> stylesheet. Since no query router was " + "specified, the request will be routed automatically to " + "<code><b>" + config.queryParserSheet + "</b></code>. Skip to " + "<a href=\"" + nextUrl + "\" target=\"_top\">step 2</a>."); break; } out.append( "In step 1, parameters specified in the URL are " + "translated, without tokenizing, to an XML " + "<code><parameters></code> " + "block (shown below). Next, this will be fed to the " + "<b>Query Router</b> stylesheet, <code><b>" + config.queryRouterSheet + "</b></code>. The result " + "should be the route to a query parser stylesheet in " + "<a href=\"" + nextUrl + "\" target=\"_top\">step 2</a>."); break; case 2: out.append( "In step 2, parameters specified in the URL are " + "tokenized and translated to an XML " + "<code><parameters></code> " + "block (shown below). Next, this will be fed to the " + "<b>Query Parser</b> stylesheet. The result " + "should be an XML query in " + "<a href=\"" + nextUrl + "\" target=\"_top\">step 3</a>."); break; case 3: out.append( "Step 3: The URL parameters from " + "<a href=\"" + prevUrl + "\" target=\"_top\">step 2</a> " + "have now been processed by the <b>Query Parser</b> stylesheet " + "into an XML query, shown below. Next, XTF's <b>Text Engine</b> " + "will execute this query to produce the raw search " + "results in " + "<a href=\"" + nextUrl + "\" target=\"_top\">step 4</a>. " + "Note that the final <b>Result Formatter</b> stylesheet " + "(for step 5) is specified here as well."); break; case 4: out.append( "In step 4, XTF's <b>Text Engine</b> has executed " + "the XML query from " + "<a href=\"" + prevUrl + "\" target=\"_top\">step 3</a> " + "to produce raw search results, shown below. These will be " + "fed in turn to the <b>Result Formatter</b> stylesheet " + "to produce the final HTML page in " + "<a href=\"" + nextUrl + "\" target=\"_top\">step 5</a>."); break; case 5: out.append( "Step 5 shows the final HTML result produced by " + "feeding the raw search results from " + "<a href=\"" + prevUrl + "\" target=\"_top\">step 4</a> " + "into the <b>Result Formatter</b> stylesheet."); } out.append(" </body>\n" + "</html>"); return out.toString(); } return null; } // stepSetup() } // class CrossQuery