/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.archivalurl; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.replay.HttpHeaderProcessor; import org.archive.wayback.replay.TextDocument; import org.archive.wayback.replay.TextReplayRenderer; import org.archive.wayback.util.Timestamp; /** * {@link TextReplayRenderer} that rewrites URLs in JavaScript resource for * replay in ArchivalURL mode, and inserts {@code jspInserts} at the top of the * document. * <p>This class looks up URLs in JavaScript by single regular expression set to * its {@code regex} property, and rewrite them with {@link ResultURIConverter} * passed to {@code updatePage} method (does not use the one set to {@link TextDocument}.)</p> * <p>Regular expression shall match single URL. Be sure to enclose URL in capture group * (i.e. {@code (}...{@code )}), or no rewrite will happen. Regular expression can have * optional capture group before URL capture group, which may be necessary for doing * more complicated match. Text captured in the first group will be copied to the * output.</p> * <p>There's an alternative implementation * {@link ArchivalURLJSStringTransformerReplayRenderer}, which supports * multiple rewrite patterns through <code>StringTransformer</code>.</p> * * @see ResultURIConverter * @see ArchivalURLJSStringTransformerReplayRenderer * @author brad */ public class ArchivalUrlJSReplayRenderer extends TextReplayRenderer { /** * @param httpHeaderProcessor which should process HTTP headers */ public ArchivalUrlJSReplayRenderer( HttpHeaderProcessor httpHeaderProcessor) { super(httpHeaderProcessor); } private final static Pattern defaultHttpPattern = Pattern .compile("(https?://[A-Za-z0-9:_@.-]+)"); private Pattern pattern = defaultHttpPattern; /** * regular expression for matching URLs. * @param regex */ public void setRegex(String regex) { pattern = Pattern.compile(regex); } public String getRegex() { return pattern.pattern(); } protected void updatePage(TextDocument page, HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, CaptureSearchResult result, Resource resource, ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException { String resourceTS = result.getCaptureTimestamp(); String captureTS = Timestamp.parseBefore(resourceTS).getDateStr(); StringBuilder sb = page.sb; StringBuffer replaced = new StringBuffer(sb.length()); Matcher m = pattern.matcher(sb); // If at least 2 groups, prepend before 2nd group and include 1st group. Allows for more sophisticated matching. // Otherwise, insert before 1st group if (m.groupCount() > 1) { while (m.find()) { String beforeHost = m.group(1); String host = m.group(2); String replacement = uriConverter.makeReplayURI(captureTS, host); m.appendReplacement(replaced, beforeHost + replacement); } } else { while (m.find()) { String host = m.group(1); String replacement = uriConverter.makeReplayURI(captureTS, host); m.appendReplacement(replaced, replacement); } } m.appendTail(replaced); // blasted StringBuilder/StringBuffer... gotta convert again... page.sb.setLength(0); page.sb.ensureCapacity(replaced.length()); page.sb.append(replaced); // if any JS-specific jsp inserts are configured, run and insert... page.insertAtStartOfDocument(buildInsertText(page, httpRequest, httpResponse, wbRequest, results, result, resource)); } }