package org.cdlib.xtf.dynaXML;
/**
* Copyright (c) 2004, Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the University of California nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.SocketException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Properties;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.sax.SAXSource;
import net.sf.saxon.Configuration;
import net.sf.saxon.Controller;
import net.sf.saxon.instruct.Executable;
import net.sf.saxon.om.NamePool;
import net.sf.saxon.trace.TraceListener;
import net.sf.saxon.trans.KeyManager;
import net.sf.saxon.tree.TreeBuilder;
import org.cdlib.xtf.servletBase.RedirectException;
import org.cdlib.xtf.servletBase.TextConfig;
import org.cdlib.xtf.servletBase.TextServlet;
import org.cdlib.xtf.servletBase.StylesheetCache;
import org.cdlib.xtf.textEngine.IndexUtil;
import org.cdlib.xtf.textEngine.QueryRequestParser;
import org.cdlib.xtf.util.AttribList;
import org.cdlib.xtf.util.EasyNode;
import org.cdlib.xtf.util.GeneralException;
import org.cdlib.xtf.util.StructuredStore;
import org.cdlib.xtf.util.Trace;
import org.cdlib.xtf.util.XMLFormatter;
import org.cdlib.xtf.util.XMLWriter;
import org.cdlib.xtf.util.XTFSaxonErrorListener;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.cdlib.xtf.lazyTree.LazyDocument;
import org.cdlib.xtf.lazyTree.LazyKeyManager;
import org.cdlib.xtf.lazyTree.LazyTreeBuilder;
import org.cdlib.xtf.lazyTree.PersistentTree;
import org.cdlib.xtf.lazyTree.LazyProfilingListener;
import org.cdlib.xtf.lazyTree.SearchTree;
/**
* Main dynaXML servlet.
*
* Processes a URL requesting a document, using the docReqParser stylesheet
* to parse the request and locate the document. Checks permissions based on
* the book being accessed and the requestor's IP address or other parameters.
* Performs optional text querying and search hit marking, and finally
* transforms the annotated document using a display stylesheet to form the
* final HTML result page.
*/
public class DynaXML extends TextServlet
{
/** Handles authentication */
Authenticator authenticator;
/** Holds global servlet configuration info */
private DynaXMLConfig config;
/** Locator used to find lazy and non-lazy document files */
private DocLocator docLocator = createDocLocator();
/**
* Called by the superclass to find out the name of our specific config
* file.
*/
public String getConfigName() {
return "conf/dynaXML.conf";
}
/**
* Reads in the configuration file and sets up our helpers (caching,
* authentication, etc.)
*/
protected TextConfig readConfig(String configPath)
{
// Load the configuration file.
config = new DynaXMLConfig(this, configPath);
// Create a helper for authentication.
authenticator = new Authenticator(this);
// And we're done.
return config;
} // readConfig()
/**
* Retrieves the current configuration information (that was read in by
* readConfig()).
*/
public TextConfig getConfig() {
return config;
}
/**
* Retrieves the IP address of the client who is requesting a page from
* this servlet. Handles un-reverse-proxying if necessary.
*
* @param req The HTTP request being processed
*
* @return The IP address (e.g. 123.22.182.1), or empty string
* if not able to figure it out.
*/
private String getClientIP(HttpServletRequest req)
{
// Start with the nominal IP address in the HTTP header.
String ip = req.getRemoteAddr();
Trace.debug(
"Checking IP \"" + ip + "\" vs reverse proxy IP \"" +
config.reverseProxyIP + "\"");
// If it matches the configured address of the reverse proxy, we have
// to un-map it.
//
if (ip.equals(config.reverseProxyIP))
{
Trace.debug("...matches reverseProxyIP");
// Normal reverse proxies store the real IP address in a standard
// header. Check that first.
//
String header = req.getHeader(config.reverseProxyDefaultMarker);
if (!isEmpty(header)) {
Trace.debug(
"...using marker " + config.reverseProxyDefaultMarker + " -> " +
header);
ip = header;
}
// However, some proxies have a special header. If it's present,
// override with that.
//
if (!isEmpty(config.reverseProxyMarker))
{
header = req.getHeader(config.reverseProxyMarker);
if (!isEmpty(header)) {
Trace.debug(
"...using marker " + config.reverseProxyMarker + " -> " + header);
ip = header;
}
}
}
// Some broken proxies prepend "unknown" to the real IP address.
// To work around these, skip all characters until we hit a digit.
//
while (ip.length() > 0 && !Character.isDigit(ip.charAt(0)))
ip = ip.substring(1);
// All done!
return ip;
} // getClientIP()
/**
* Handles the HTTP 'get' method. Based on the HTTP request, produces an
* appropriate response.
*
* @param req The HTTP request
* @param res The HTTP response
* @exception IOException If unable to write the output stream.
*/
public void doGet(HttpServletRequest req, HttpServletResponse res)
throws IOException
{
try
{
// Get the parameters out of the request structure.
String source = req.getParameter("source");
// If profiling is enabled, we have to notify the stylesheet
// cache.
//
StylesheetCache.TraceListenerFactory tlf = null;
if (config.stylesheetProfiling) {
tlf = new StylesheetCache.TraceListenerFactory() {
public TraceListener createListener() {
return new LazyProfilingListener();
}
};
}
stylesheetCache.enableProfiling(tlf);
// Set the default output content type
res.setContentType("text/html");
// Output extended debugging info if requested.
Trace.debug("Processing request: " + getRequestURL(req));
// Run the document request parser
DocRequest docReq = runDocReqParser(req, makeAttribList(req));
// If source overridden in the URL, make sure it's really
// external.
//
if (!isEmpty(source) && source.startsWith("http://"))
{
docReq = new DocRequest(docReq);
docReq.source = source;
}
else {
// Check that the document actually exists.
File docFile = new File(docReq.source);
if (!docFile.canRead())
throw new InvalidDocumentException();
}
// Authenticate (if necessary)
if (!authenticate(docReq, req, res))
return;
// This does the bulk of the work.
apply(docReq, req, res);
}
catch (Exception e) {
if (!(e instanceof RedirectException) && !(e instanceof SocketException))
{
try {
genErrorPage(req, res, e);
}
catch (RedirectException re) {
}
}
return;
}
} // doGet()
/**
* Creates a document request by running the docReqParser stylesheet and
* the given attributes.
*
* @param req The original HTTP request
* @param attribs Attributes to pass to the stylesheet.
* @return A parsed document request, or null if before that step
*/
protected DocRequest runDocReqParser(HttpServletRequest req,
AttribList attribs)
throws Exception
{
DocRequest info = new DocRequest();
// Running the stylesheet may produce additional dependencies if the
// sheet reads in extra files dependent on the document ID. To avoid
// having to throw away the stylesheet entry, we record the current
// dependencies so we can restore them later.
//
Iterator di = stylesheetCache.getDependencies(
config.docLookupSheet);
LinkedList oldStylesheetDeps = new LinkedList();
while (di.hasNext())
oldStylesheetDeps.add(di.next());
// Locate the document request parser stylesheet.
Templates sheet = stylesheetCache.find(config.docLookupSheet);
// Get a transformer to handle this stylesheet.
Transformer trans = sheet.newTransformer();
// Stuff all the common config properties into the transformer in
// case the query generator needs access to them.
//
stuffAttribs(trans, config.attribs);
// Also stuff the URL parameters, in case it wants them that way
// instead of tokenized.
//
stuffAttribs(trans, attribs);
// Add the special computed attributes.
stuffSpecialAttribs(req, trans);
// Make sure errors get directed to the right place.
if (!(trans.getErrorListener() instanceof XTFSaxonErrorListener))
trans.setErrorListener(new XTFSaxonErrorListener());
// Make a <parameters> block.
XMLFormatter fmt = new XMLFormatter();
fmt.blankLineAfterTag(false);
buildParamBlock(attribs, fmt, config.tokenizerMap, null);
if (Trace.getOutputLevel() >= Trace.debug) {
String tmp = fmt.toString();
if (tmp.endsWith("\n"))
tmp = tmp.substring(0, tmp.length() - 1);
Trace.debug("*** docReqParser input ***\n" + tmp);
}
// Now request the stylesheet to give us the info for this document.
TreeBuilder result;
result = new TreeBuilder();
trans.transform(fmt.toSource(), result);
if (Trace.getOutputLevel() >= Trace.debug) {
Trace.debug("*** docReqParser output ***");
Trace.tab();
Trace.debug(XMLWriter.toString(result.getCurrentRoot()));
Trace.untab();
}
// Extract the data we need.
EasyNode root = new EasyNode(result.getCurrentRoot());
for (int i = 0; i < root.nChildren(); i++)
{
EasyNode el = root.child(i);
String tagName = el.name();
if (tagName.equals("style"))
info.style = getRealPath(el.attrValue("path"));
else if (tagName.equals("source"))
info.source = getRealPath(el.attrValue("path"));
else if (tagName.equals("index")) {
info.indexConfig = getRealPath(el.attrValue("configPath"));
info.indexName = el.attrValue("name");
}
else if (tagName.equals("brand"))
info.brand = getRealPath(el.attrValue("path"));
else if (tagName.equals("auth"))
info.authSpecs.add(authenticator.processAuthTag(el));
else if (tagName.equals("query")) {
info.query = new QueryRequestParser().parseRequest(el.getWrappedNode(),
new File(getRealPath("")));
}
else if (tagName.equalsIgnoreCase("preFilter"))
info.preFilter = getRealPath(el.attrValue("path"));
else if (tagName.equalsIgnoreCase("removeDoctypeDecl")) {
String val = el.attrValue("flag");
if (val.matches("^yes$|^true$"))
info.removeDoctypeDecl = true;
else if (val.matches("^no$|^false$"))
info.removeDoctypeDecl = false;
else
throw new DynaXMLException(
"Expected 'true', 'false', " +
"'yes', or 'no' for flag attribute of " + tagName +
" tag specified by docReqParser, but found '" + val + "'");
}
else
throw new DynaXMLException(
"Unknown tag '" + tagName + "' specified by docReqParser");
} // for node
// If no source, assume that means an invalid document ID.
if (isEmpty(info.source))
throw new InvalidDocumentException();
// Make sure a stylesheet was specified.
requireOrElse(info.style, "docReqParser didn't specify 'style'");
// Index config and index name must be either both specified or both
// absent.
//
if (isEmpty(info.indexConfig) && !isEmpty(info.indexName))
throw new GeneralException(
"docReqParser specified 'indexName' without 'indexConfig'");
if (!isEmpty(info.indexConfig) && isEmpty(info.indexName))
throw new GeneralException(
"docReqParser specified 'indexConfig' without 'indexName'");
// And we're done.
return info;
} // runDocReqParser()
/**
* Performs user authentication for a request, given the authentication
* info for the document.
*
* @param docReq Info structure containing authentication parameters
* @param req The request being processed
* @param res Where to send results if authentication fails
*
* @return true iff authentication succeeds
*/
protected boolean authenticate(DocRequest docReq,
HttpServletRequest req, HttpServletResponse res)
throws Exception
{
// Determine the real IP address of the client.
String ipAddr = getClientIP(req);
// Check if this client has permission to access the document.
// An exception thrown if not; false returned if a redirect
// to an external page must happen first.
//
if (!authenticator.checkAuth(ipAddr, docReq.authSpecs, req, res)) {
return false;
}
return true;
} // authenticate()
/**
* Informational method required by Servlet interface. Doesn't seem to
* matter what it says.
*
* @return A string describing this servlet.
*/
public String getServletInfo() {
return "dynaXML dynamic publishing servlet";
} // getServletInfo()
/**
* Loads the source document, optionally performs a text search on it, and
* then runs the document formatter to produce the final HTML result page.
*
* @param docReq Document information (stylesheet, source, etc.)
* @param req The original HTTP request
* @param res Where to send the HTML response
*
* @exception TransformerException If there's an error in the stylesheet.
* @exception IOException If stylesheet or source can't be read.
*/
private void apply(DocRequest docReq, HttpServletRequest req,
HttpServletResponse res)
throws Exception
{
boolean dump = false;
// First, load the stylesheet.
Templates pss = stylesheetCache.find(docReq.style);
// Figure out the output mime type
res.setContentType(calcMimeType(pss));
// Make a transformer and stuff it full of parameters. But if it's the
// same stylesheet as we used last time in this thread, we can re-use
// it for speed (keys won't have to be rebuilt.)
//
Transformer transformer = pss.newTransformer();
stuffAttribs(transformer, req);
stuffAttribs(transformer, config.attribs);
// Also read the brand profile. It's just a simple stylesheet and we
// stuff the output tags into parameters. They can be whatever the
// stylesheet writer desires.
//
readBranding(docReq.brand, req, transformer);
// Get the source document.
Source sourceDoc = getSourceDoc(docReq, transformer);
// If we are in raw mode, use a null transform instead of the
// stylesheet.
//
String raw = req.getParameter("raw");
if ("yes".equals(raw) || "true".equals(raw) || "1".equals(raw))
{
res.setContentType("text/xml");
transformer = IndexUtil.createTransformer();
Properties props = transformer.getOutputProperties();
props.put("indent", "yes");
props.put("method", "xml");
transformer.setOutputProperties(props);
}
// Modify as necessary
if (dump && sourceDoc instanceof PersistentTree)
((PersistentTree)sourceDoc).setAllPermanent(true);
// Make sure errors get directed to the right place.
if (!(transformer.getErrorListener() instanceof XTFSaxonErrorListener))
transformer.setErrorListener(new XTFSaxonErrorListener());
// Our tree is pre-stripped, so it would be inefficient to strip it
// again.
//
((Controller)transformer).getExecutable().setStripsWhitespace(false);
// Now do the bulk of the work
try
{
transformer.transform(sourceDoc,
createFilteredReceiver(transformer, req, res));
}
finally
{
// Clean up.
if (config.stylesheetProfiling) {
Trace.info("Profile for request: " + getRequestURL(req));
Trace.tab();
((PersistentTree)sourceDoc).printProfile();
Trace.untab();
Trace.info("End of profile.");
}
// Debugging: dump search tree.
if (dump && sourceDoc instanceof SearchTree) {
((SearchTree)sourceDoc).pruneUnused();
File file = new File("C:\\tmp\\tree.dump");
Trace.info("Dumping " + file.getAbsolutePath());
PrintWriter outWriter = new PrintWriter(
new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
outWriter.println(XMLWriter.toString(sourceDoc));
outWriter.close();
}
// It's a good idea to close disk-based trees when done using them.
if (sourceDoc instanceof PersistentTree)
((PersistentTree)sourceDoc).close();
}
} // apply()
/**
* Does the work of locating and loading the source document. Handles
* fetching a file from a URL, lazy file, or a plain XML file on disk.
* Also fires up a text query if requested.
*
* @param docReq Tells which document to load, the query to
* apply, tec.
* @param transformer The XSLT transformer that will be used on the
* document.
*
* @return An XML Source object representing the loaded
* document.
*
* @throws IOException If a problem is encountered loading a file or URL
* @throws SAXException If the document cannot be parsed as valid XML
* @throws ParserConfigurationException Miscellaneous configuration
* problems
*/
protected Source getSourceDoc(DocRequest docReq, Transformer transformer)
throws IOException, SAXException, ParserConfigurationException,
InvalidDocumentException
{
// If a pre-filter stylesheet was specified, load it.
Templates preFilter = null;
if (docReq.preFilter != null)
{
try {
preFilter = stylesheetCache.find(docReq.preFilter);
}
catch (IOException e) {
throw e;
}
catch (SAXException e) {
throw e;
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
// See if we can find a lazy version of the document (for speed
// and searching)
//
StructuredStore lazyStore = docLocator.getLazyStore(docReq.indexConfig,
docReq.indexName,
docReq.source,
preFilter,
docReq.removeDoctypeDecl);
// If not found...
if (lazyStore == null)
{
// Can't perform queries without a lazy tree and its corresponding
// index.
//
if (docReq.query != null)
throw new UnsupportedQueryException();
// Can't find a lazy store... does the original source file exist?
if (!docReq.source.startsWith("http://")) {
File srcFile = new File(docReq.source);
if (!srcFile.isFile() || !srcFile.canRead())
throw new InvalidDocumentException();
}
// Okay, read the original source file.
XMLReader xmlReader = IndexUtil.createXMLReader();
InputSource inSrc = docLocator.getInputSource(docReq.source,
docReq.removeDoctypeDecl);
return new SAXSource(xmlReader, inSrc);
}
// Okay, let's use the lazy version of the document...
Source sourceDoc = null;
// Get the Saxon configuration to use
Controller controller = (Controller)transformer;
Configuration config = controller.getConfiguration();
// If a query was specified, make a SearchTree; otherwise, make
// a normal lazy tree.
//
if (docReq.query != null && docReq.query.query != null) {
String docKey = IndexUtil.calcDocKey(new File(getRealPath("")),
new File(docReq.indexConfig),
docReq.indexName,
new File(docReq.source));
SearchTree tree = new SearchTree(config, docKey, lazyStore);
tree.search(createQueryProcessor(), docReq.query);
sourceDoc = tree;
}
else {
LazyTreeBuilder builder = new LazyTreeBuilder(config);
builder.setNamePool(NamePool.getDefaultNamePool());
sourceDoc = builder.load(lazyStore);
}
// We want to print out any indexes being created, because
// they should have all been done by the textIndexer.
//
((LazyDocument)sourceDoc).setDebug(true);
// We need a special key manager on the lazy tree, so that we can
// use lazily stored keys on disk.
//
Executable e = controller.getExecutable();
KeyManager k = e.getKeyManager();
if (!(k instanceof LazyKeyManager))
e.setKeyManager(new LazyKeyManager(controller.getConfiguration(), k));
// All done.
return sourceDoc;
} // getSourceDoc()
/**
* Create a DocLocator. Checks the system property
* "org.cdlib.xtf.DocLocatorClass" to see if there is a user-
* supplied implementation. If not, a {@link DefaultDocLocator} is
* created.
*/
public DocLocator createDocLocator()
{
// Check the system property.
final String propName = "org.cdlib.xtf.DocLocatorClass";
String className = System.getProperty(propName);
Class theClass = DefaultDocLocator.class;
try
{
// Try to create an object of the correct class.
if (className != null)
theClass = Class.forName(className);
DocLocator loc = (DocLocator)theClass.newInstance();
loc.setServlet(this);
return loc;
}
catch (ClassCastException e) {
Trace.error(
"Error: Class '" + className + "' specified by " + "the '" + propName +
"' property does not support the " + DocLocator.class.getName() +
" interface");
throw new RuntimeException(e);
}
catch (Exception e) {
Trace.error(
"Error creating instance of class '" + className +
"' specified by the '" + propName + "' property");
throw new RuntimeException(e);
}
} // createDocLocator()
/**
* Tells the servlet whether to perform stylesheet profiling. The profile
* is (currently) sent to Trace.info().
*
* @param flag If true, subsequent XSLT transformations will be
* profiled.
*/
public void setProfiling(boolean flag) {
config.stylesheetProfiling = flag;
}
} // class TextServlet