/* * HTMLServlet.java * * Version: $Revision: 4430 $ * * Date: $Date: 2009-10-10 17:21:30 +0000 (Sat, 10 Oct 2009) $ * * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts * Institute of Technology. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the Hewlett-Packard Company nor the name of the * Massachusetts Institute of Technology nor the names of their * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.app.webui.servlet; import java.io.IOException; import java.io.InputStream; import java.net.URLDecoder; import java.sql.SQLException; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.log4j.Logger; import org.dspace.app.webui.util.JSPManager; import org.dspace.authorize.AuthorizeException; import org.dspace.content.Bitstream; import org.dspace.content.Bundle; import org.dspace.content.Item; import org.dspace.core.ConfigurationManager; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.core.LogManager; import org.dspace.core.Utils; import org.dspace.handle.HandleManager; import org.dspace.services.model.Event; import org.dspace.usage.UsageEvent; import org.dspace.utils.DSpace; /** * Servlet for HTML bitstream support. * <P> * If we receive a request like this: * <P> * <code>http://dspace.foo.edu/html/123.456/789/foo/bar/index.html</code> * <P> * we first check for a bitstream with the *exact* filename * <code>foo/bar/index.html</code>. Otherwise, we strip the path information * (up to three levels deep to prevent infinite URL spaces occurring) and see if * we have a bitstream with the filename <code>index.html</code> (with no * path). If this exists, it is served up. This is because if an end user * uploads a composite HTML document with the submit UI, we will not have * accurate path information, and so we assume that if the browser is requesting * foo/bar/index.html but we only have index.html, that this is the desired file * but we lost the path information on upload. * * @author Austin Kim, Robert Tansley * @version $Revision: 4430 $ */ public class HTMLServlet extends DSpaceServlet { /** log4j category */ private static Logger log = Logger.getLogger(HTMLServlet.class); /** * Default maximum number of path elements to strip when testing if a * bitstream called "foo.html" should be served when "xxx/yyy/zzz/foo.html" * is requested. */ private int maxDepthGuess; /** * Create an HTML Servlet */ public HTMLServlet() { super(); if (ConfigurationManager.getProperty("webui.html.max-depth-guess") != null) { maxDepthGuess = ConfigurationManager .getIntProperty("webui.html.max-depth-guess"); } else { maxDepthGuess = 3; } } // Return bitstream whose name matches the target bitstream-name // bsName, or null if there is no match. Match must be exact. // NOTE: This does not detect duplicate bitstream names, just returns first. private static Bitstream getItemBitstreamByName(Item item, String bsName) throws SQLException { Bundle[] bundles = item.getBundles(); for (int i = 0; i < bundles.length; i++) { Bitstream[] bitstreams = bundles[i].getBitstreams(); for (int k = 0; k < bitstreams.length; k++) { if (bsName.equals(bitstreams[k].getName())) return bitstreams[k]; } } return null; } // On the surface it doesn't make much sense for this servlet to // handle POST requests, but in practice some HTML pages which // are actually JSP get called on with a POST, so it's needed. protected void doDSPost(Context context, HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException, SQLException, AuthorizeException { doDSGet(context, request, response); } protected void doDSGet(Context context, HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException, SQLException, AuthorizeException { Item item = null; Bitstream bitstream = null; String idString = request.getPathInfo(); String filenameNoPath = null; String fullpath = null; String handle = null; // Parse URL if (idString != null) { // Remove leading slash if (idString.startsWith("/")) { idString = idString.substring(1); } // Get handle and full file path int slashIndex = idString.indexOf('/'); if (slashIndex != -1) { slashIndex = idString.indexOf('/', slashIndex + 1); if (slashIndex != -1) { handle = idString.substring(0, slashIndex); fullpath = URLDecoder.decode(idString .substring(slashIndex + 1), Constants.DEFAULT_ENCODING); // Get filename with no path slashIndex = fullpath.indexOf('/'); if (slashIndex != -1) { String[] pathComponents = fullpath.split("/"); if (pathComponents.length <= maxDepthGuess + 1) { filenameNoPath = pathComponents[pathComponents.length - 1]; } } } } } if (handle != null && fullpath != null) { // Find the item try { /* * If the original item doesn't have a Handle yet (because it's * in the workflow) what we actually have is a fake Handle in * the form: db-id/1234 where 1234 is the database ID of the * item. */ if (handle.startsWith("db-id")) { String dbIDString = handle .substring(handle.indexOf('/') + 1); int dbID = Integer.parseInt(dbIDString); item = Item.find(context, dbID); } else { item = (Item) HandleManager .resolveToObject(context, handle); } } catch (NumberFormatException nfe) { // Invalid ID - this will be dealt with below } } if (item != null) { // Try to find bitstream with exactly matching name + path bitstream = getItemBitstreamByName(item, fullpath); if (bitstream == null && filenameNoPath != null) { // No match with the full path, but we can try again with // only the filename bitstream = getItemBitstreamByName(item, filenameNoPath); } } // Did we get a bitstream? if (bitstream != null) { log.info(LogManager.getHeader(context, "view_html", "handle=" + handle + ",bitstream_id=" + bitstream.getID())); new DSpace().getEventService().fireEvent( new UsageEvent( UsageEvent.Action.VIEW, request, context, bitstream)); //new UsageEvent().fire(request, context, AbstractUsageEvent.VIEW, // Constants.BITSTREAM, bitstream.getID()); // Set the response MIME type response.setContentType(bitstream.getFormat().getMIMEType()); // Response length response.setHeader("Content-Length", String.valueOf(bitstream .getSize())); // Pipe the bits InputStream is = bitstream.retrieve(); Utils.bufferedCopy(is, response.getOutputStream()); is.close(); response.getOutputStream().flush(); } else { // No bitstream - we got an invalid ID log.info(LogManager.getHeader(context, "view_html", "invalid_bitstream_id=" + idString)); JSPManager.showInvalidIDError(request, response, idString, Constants.BITSTREAM); } } }