/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.webapp; import java.io.IOException; import java.util.logging.Logger; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.url.UsableURI; import org.archive.url.UsableURIFactory; import org.archive.util.ArchiveUtils; import org.archive.wayback.util.Timestamp; import org.archive.wayback.util.url.UrlOperations; import org.archive.wayback.util.webapp.AbstractRequestHandler; import org.archive.wayback.util.webapp.RequestHandler; /** * {@code ServerRelativeArchivalRedirect} is a {@link RequestHandler} * that redirects <i>leaked</i> server-relative URL back to replay request * URL. * <p>For example, assuming {@code Referer} is * {@code http://web.archive.org/web/20010203040506/http://example.com/index.html}, * it redirects request {@code http://web.archive.org/js/foo.js} * to {@code http://web.archive.org/web/20010203040506/http://example.com/js/foo.js}.</p> * <p>It is typically set up as catch-all {@code RequestHandler}</p> * @author brad */ public class ServerRelativeArchivalRedirect extends AbstractRequestHandler { private static final Logger LOGGER = Logger .getLogger(ServerRelativeArchivalRedirect.class.getName()); boolean useCollection = false; private String matchHost = null; private int matchPort = -1; private String replayPrefix; private String handleRequestWithCollection(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { // TODO: check if this works with non-empty context path. // I believe requestURI starts with context path. So non-empty // context path will break collection and timestamp extraction // code below. It'd be desirable for this class to get a pattern // of replay URL from somewhere - replayPreifx? // hope that it's a server relative request, with a valid referrer: String referer = httpRequest.getHeader("Referer"); if (referer == null) return null; final UsableURI refuri = UsableURIFactory.getInstance(referer); // Check that the Referer is our current wayback path // before attempting to use referer as base archival url if ((matchHost != null && !matchHost.equals(refuri.getHost())) || (matchPort != -1 && refuri.getPort() != -1 && matchPort != refuri .getPort())) { LOGGER.info("Server-Relative-Redirect: Skipping, Referer " + refuri.getHost() + ":" + refuri.getPort() + " not from matching wayback host:port\t"); return null; } String path = refuri.getPath(); int secondSlash = path.indexOf('/', 1); if (secondSlash == -1) return null; String collection = path.substring(0, secondSlash); collection = modifyCollection(collection); String remainder = path.substring(secondSlash + 1); int thirdSlash = remainder.indexOf('/'); if (thirdSlash == -1) return null; String datespec = remainder.substring(0, thirdSlash); if (!datespec.isEmpty() && !Character.isDigit(datespec.charAt(0))) { datespec = null; } String url = remainder.substring(thirdSlash + 1); url = UrlOperations.fixupScheme(url); url = ArchiveUtils.addImpliedHttpIfNecessary(url); String thisPath = httpRequest.getRequestURI(); String queryString = httpRequest.getQueryString(); if (queryString != null) { thisPath += "?" + queryString; } String resolved = UrlOperations.resolveUrl(url, thisPath); String contextPath = httpRequest.getContextPath(); StringBuilder sb = new StringBuilder(refuri.getScheme()); sb.append("://"); sb.append(refuri.getAuthority()); sb.append(contextPath); sb.append(collection); sb.append("/"); if (datespec != null) { sb.append(datespec); sb.append("/"); } sb.append(resolved); return sb.toString(); } /** * modify collection if necessary. * <p>default implementation simply return {@code collection}.</p> * @param collection (the first path component of Referer URL.) * note value has leading slash, which must be retained. * @return possibly modified collection. */ protected String modifyCollection(String collection) { return collection; } private String handleRequestWithoutCollection( HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { // hope that it's a server relative request, with a valid referrer: String referer = httpRequest.getHeader("Referer"); if (referer == null) return null; LOGGER.fine("referer:" + referer); UsableURI uri = UsableURIFactory.getInstance(referer); String path = uri.getPath(); String remainder = path.substring(1); int thirdSlash = remainder.indexOf('/'); LOGGER.fine("referer:(" + referer + ") remain(" + remainder + ") 3rd(" + thirdSlash + ")"); if (thirdSlash == -1) return null; String datespec = remainder.substring(0, thirdSlash); String url = remainder.substring(thirdSlash + 1); url = UrlOperations.fixupScheme(url); url = ArchiveUtils.addImpliedHttpIfNecessary(url); String thisPath = httpRequest.getRequestURI(); String queryString = httpRequest.getQueryString(); if (queryString != null) { thisPath += "?" + queryString; } String resolved = UrlOperations.resolveUrl(url, thisPath); String contextPath = httpRequest.getContextPath(); String finalUrl = uri.getScheme() + "://" + uri.getAuthority() + contextPath + "/" + datespec + "/" + resolved; // cross your fingers!!! LOGGER.info("Server-Relative-Redirect:\t" + referer + "\t" + thisPath + "\t" + finalUrl); return finalUrl; } @Override public boolean handleRequest(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { if (matchHost != null) { if (!matchHost.equals(httpRequest.getServerName())) { LOGGER.fine("Wrong host for ServerRelativeRed(" + httpRequest.getServerName() + ")"); return false; } } if (matchPort != -1) { if (matchPort != httpRequest.getLocalPort()) { LOGGER.fine("Wrong port for ServerRealtiveRed(" + httpRequest.getServerName() + ")(" + httpRequest.getLocalPort() + ") :" + httpRequest.getRequestURI()); return false; } } String replayUrl = (useCollection ? handleRequestWithCollection(httpRequest, httpResponse) : handleRequestWithoutCollection(httpRequest, httpResponse)); if (replayUrl == null && replayPrefix != null) { String thisPath = httpRequest.getRequestURI(); String queryString = httpRequest.getQueryString(); if (queryString != null) { thisPath += "?" + queryString; } // TODO: rethink this fallback, for now adding https support as // well if (thisPath.startsWith("/http://") || thisPath.startsWith("/https://")) { // assume a replay request: StringBuilder sb = new StringBuilder(thisPath.length() + replayPrefix.length() + 16); sb.append(replayPrefix); sb.append(Timestamp.currentTimestamp().getDateStr()); sb.append(thisPath); replayUrl = sb.toString(); } } if (replayUrl != null) { // Gotta make sure this is properly cached, or // weird things happen: httpResponse.addHeader("Vary", "Referer"); httpResponse.sendRedirect(replayUrl); return true; } return false; } /** * @return the useCollection */ public boolean isUseCollection() { return useCollection; } /** * whether replay URL has <i>collection</i> part. * <p>set this to {@code true} if replay URL has <i>collection</i> * part, path component between context path and timestamp (although * it's called <i>collection</i> based on the common usage of this part, * there's no need to have particular semantics.)</p> * <p>collection part will be passed to {@link #modifyCollection(String)} * before constructing final replay URL to redirect to.<p> * @param useCollection the useCollection to set */ public void setUseCollection(boolean useCollection) { this.useCollection = useCollection; } /** * @return the matchHost */ public String getMatchHost() { return matchHost; } /** * optional host name {@code Referer} URL should match. * @param matchHost the matchHost to set */ public void setMatchHost(String matchHost) { this.matchHost = matchHost; } /** * @return the matchPort */ public int getMatchPort() { return matchPort; } /** * optional port number {@code Referer} URL should match. * @param matchPort the matchPort to set */ public void setMatchPort(int matchPort) { this.matchPort = matchPort; } /** * @return the replayPrefix */ public String getReplayPrefix() { return replayPrefix; } /** * optional replay URL prefix used by fallback method. * @param replayPrefix the replayPrefix to set */ public void setReplayPrefix(String replayPrefix) { this.replayPrefix = replayPrefix; } }