/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.replay.html; import java.io.IOException; import java.io.OutputStream; import java.net.URL; import java.util.HashMap; import java.util.Map; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.replay.JSPExecutor; import org.archive.wayback.util.htmllex.ParseContext; /** * {@code ReplayParseContext} holds context information shared among * replay rewriter components. * <p>2014-05-02 small behavior/interface changes: * <ul> * <li>{@link #setJspExec(JSPExecutor)} no longer copies {@code CaptureSearchResult} * object from its {@code UIResults} object to {@code result} member. Use new constructor * taking {@code CaptureSearchResult} object (recommended), or use * {@link #setCaptureSearchResult(CaptureSearchResult)} method.</li> * <li> * </ul> * TODO: consider replacing {@code CaptureSearchResult} reference with {@code Capture}. */ public class ReplayParseContext extends ParseContext { private static final String MAILTO_PREFIX = "mailto:"; public static final String JAVASCRIPT_PREFIX = "javascript:"; public static final String DATA_PREFIX = "data:"; public static final String ANCHOR_PREFIX = "#"; private ContextResultURIConverterFactory uriConverterFactory = null; private String datespec = null; private JSPExecutor jspExec = null; private OutputStream outputStream = null; private Map<String,ResultURIConverter> converters = null; private String outputCharset; private int phase = -1; private int jsBlockCount = -1; private CaptureSearchResult result; private boolean rewriteHttpsOnly; /** * Constructs {@code ReplayParseContext} for rewriting a resource * represented by {@code result}. * <p>Initializes {@code baseUrl} and {@code datespec} from {@code result}'s * {@code originalUrl} and {@code captureTimestamp}, respectively.</p> * @param uriConverterFactory * @param result * @throws IOException */ public ReplayParseContext(ContextResultURIConverterFactory uriConverterFactory, CaptureSearchResult result) throws IOException { this.uriConverterFactory = uriConverterFactory; this.result = result; setBaseUrl(result.getOriginalUrl()); this.datespec = result.getCaptureTimestamp(); this.converters = new HashMap<String, ResultURIConverter>(); } /** * constructor. {@code CaptureSearchResult} needs to be set via * {@link #setCaptureSearchResult}. * @param uriConverterFactory * @param baseUrl * @param datespec * @deprecated 2014-05-02 use {@link #ReplayParseContext(ContextResultURIConverterFactory, CaptureSearchResult)} */ public ReplayParseContext(ContextResultURIConverterFactory uriConverterFactory, URL baseUrl, String datespec) { this.uriConverterFactory = uriConverterFactory; setBaseUrl(baseUrl.toExternalForm()); this.datespec = datespec; this.converters = new HashMap<String,ResultURIConverter>(); } public void setPhase(int phase) { this.phase = phase; } public int getPhase() { return phase; } public void setRewriteHttpsOnly(boolean rewriteHttpsOnly) { this.rewriteHttpsOnly = rewriteHttpsOnly; } /** * return {@code true} if {@code url} needs rewrite in this * replay. * <p>As {@link #contextualizeUrl(String, String)} runs this test, * there's no real point doing this check outside of ReplayParseContext. * this method may be changed to {@code protected} in the future.</p> * @param url URL to test. it must be free of escaping (i.e. no {@code "https:\/\/"}.) * @return {@code true} if {@code url} needs rewrite. * @see #setRewriteHttpsOnly(boolean) */ public boolean isRewriteSupported(String url) { if (rewriteHttpsOnly) return url.startsWith(WaybackConstants.HTTPS_URL_PREFIX); return true; } /** * @return the converters */ public Map<String, ResultURIConverter> getConverters() { return converters; } /** * return {@code CaptureSearchResult} being rendered. * <p>intended for selecting site-specific rewrite rules.</p> * <p>TODO: what's really needed is its {@code urlKey}. add * a method for it for better encapsulation.</p> * @return {@code CaptureSearchResult} in replay mode, * or {@code null} otherwise. */ public CaptureSearchResult getCaptureSearchResult() { return result; } /** * Set capture being rendered. * @param result * @deprecated 2014-11-05 Pass it to constructor */ public void setCaptureSearchResult(CaptureSearchResult result) { this.result = result; } /** * @param converters the converters to set */ public void setConverters(Map<String, ResultURIConverter> converters) { this.converters = converters; } public void addConverter(String flag, ResultURIConverter converter) { converters.put(flag, converter); } /** * returns {@link ResultURIConverter} for resource context <code>flags</code>. * @param flags resource context indicator such as "{@code cs_}", "{@code im_}". * @return ResultURIConverter for translating URL */ public ResultURIConverter getConverter(String flags) { // TODO: caching should be a responsibility of ContextResultURIConverterFactory. // but it's a API-breaking change as converters is exposed through getter. ResultURIConverter converter = converters.get(flags); if(converter == null) { converter = uriConverterFactory.getContextConverter(flags); converters.put(flags,converter); } return converter; } /** * Rewrite URL {@code url} in accordance with current replay mode, not using * replay context {@code flags}. * @param url URL, candidate for rewrite. may contain escaping. must not be {@code null}. * @return rewrittenURL, or {@code url} if no rewrite is necessary. never {@code null}. */ @Override public String contextualizeUrl(String url) { return contextualizeUrl(url,""); } /** * Rewrite URL {@code url} in accordance with current replay mode, taking * replay context {@code flags} into account. * <p>It is important to return the same String object {@code url} if no rewrite * is necessary, so that caller can short-circuit to avoid expensive String operations.</p> * @param url URL, candidate for rewrite. may contain escaping. must not be {@code null}. * @param flags <em>context</em> designator, such as {@code "cs_"}. can be {@code null}. * @return rewrittenURL, or {@code url} if no rewrite is necessary. never {@code null}. */ public String contextualizeUrl(final String url, String flags) { // if we get an empty string, just return it: if (url.length() == 0) { return url; } if (url.startsWith(JAVASCRIPT_PREFIX) || url.startsWith(MAILTO_PREFIX) || url.startsWith(ANCHOR_PREFIX)) { return url; } // XXX duplicated check for MAILTO_PREFIX?? if (url.startsWith(DATA_PREFIX) || url.startsWith(MAILTO_PREFIX)) { return url; } // don't rewrite path-relative urls. For // https://webarchive.jira.com/browse/ARI-3985 String trimmedUrl = url.trim(); if (!trimmedUrl.startsWith("http://") && !trimmedUrl.startsWith("https://") && !trimmedUrl.startsWith("//") && !trimmedUrl.startsWith("http:\\\\/\\\\/") && !trimmedUrl.startsWith("http\\\\u00253A\\\\u00252F\\\\u00252F") && !trimmedUrl.startsWith("https:\\\\/\\\\/") && !trimmedUrl .startsWith("https\\\\u00253A\\\\u00252F\\\\u00252F") && !trimmedUrl.startsWith("http:\\/\\/") && !trimmedUrl.startsWith("https:\\/\\/") && !trimmedUrl.startsWith("/") && !trimmedUrl.startsWith(".")) { return url; } // first make url into absolute, taking BASE into account. // (this also removes escaping: ex. "https:\/\/" -> "https://") String absurl = super.contextualizeUrl(url); if (!isRewriteSupported(absurl)) { return url; } // XXX do this in getConverter if (flags == null) { flags = ""; } ResultURIConverter converter = getConverter(flags); return converter.makeReplayURI(datespec, absurl); } /** * @return the charset */ public String getOutputCharset() { return outputCharset; } /** * @param outputCharset the outputCharset to set */ public void setOutputCharset(String outputCharset) { this.outputCharset = outputCharset; } /** * @return the outputStream */ public OutputStream getOutputStream() { return outputStream; } /** * @param outputStream the outputStream to set */ public void setOutputStream(OutputStream outputStream) { this.outputStream = outputStream; } /** * @return the jspExec */ public JSPExecutor getJspExec() { return jspExec; } /** * @param jspExec the jspExec to set */ public void setJspExec(JSPExecutor jspExec) { this.jspExec = jspExec; } /** * @return the datespec */ public String getDatespec() { return datespec; } /** * @param datespec the datespec to set */ public void setDatespec(String datespec) { this.datespec = datespec; } public void incJSBlockCount() { jsBlockCount++; } public int getJSBlockCount() { return jsBlockCount; } }