/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.webapp; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Properties; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.format.gzip.zipnum.ZipNumBlockLoader; import org.archive.wayback.ExceptionRenderer; import org.archive.wayback.QueryRenderer; import org.archive.wayback.ReplayDispatcher; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.RequestParser; import org.archive.wayback.ResourceStore; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.accesscontrol.ContextExclusionFilterFactory; import org.archive.wayback.accesscontrol.CollectionContext; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.archivalurl.ArchivalUrl; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.UIResults; import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; import org.archive.wayback.exception.AdministrativeAccessControlException; import org.archive.wayback.exception.AnchorWindowTooSmallException; import org.archive.wayback.exception.AuthenticationControlException; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.BaseExceptionRenderer; import org.archive.wayback.exception.BetterReplayRequestException; import org.archive.wayback.exception.BetterRequestException; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.SpecificCaptureReplayException; import org.archive.wayback.exception.WaybackException; import org.archive.wayback.memento.DefaultMementoHandler; import org.archive.wayback.memento.MementoHandler; import org.archive.wayback.memento.MementoUtils; import org.archive.wayback.replay.DefaultReplayCaptureSelector; import org.archive.wayback.replay.ReplayCaptureSelector; import org.archive.wayback.replay.html.RewriteDirector; import org.archive.wayback.resourceindex.cdxserver.EmbeddedCDXServerIndex; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.resourceindex.filters.WARCRevisitAnnotationFilter; import org.archive.wayback.util.operator.BooleanOperator; import org.archive.wayback.util.url.UrlOperations; import org.archive.wayback.util.webapp.AbstractRequestHandler; import org.archive.wayback.util.webapp.ShutdownListener; import org.archive.wayback.webapp.LiveWebRedirector.LiveWebState; /** * Retains all information about a particular Wayback configuration * within a ServletContext, including holding references to the * implementation instances of the primary Wayback classes: * * RequestParser * ResourceIndex(via WaybackCollection) * ResourceStore(via WaybackCollection) * QueryRenderer * ReplayDispatcher * ExceptionRenderer * ResultURIConverter * * * @author brad */ public class AccessPoint extends AbstractRequestHandler implements ShutdownListener, CollectionContext { /** webapp relative location of Interstitial.jsp */ public final static String INTERSTITIAL_JSP = "jsp/Interstitial.jsp"; /** argument for Interstitial.jsp target URL */ public final static String INTERSTITIAL_TARGET = "target"; /** argument for Interstitial.jsp seconds to delay */ public final static String INTERSTITIAL_SECONDS = "seconds"; /** argument for Interstitial.jsp msse for replay date */ public final static String INTERSTITIAL_DATE = "date"; /** argument for Interstitial.jsp URL being loaded */ public final static String INTERSTITIAL_URL = "url"; public final static String REVISIT_STR = "warc/revisit"; public final static String EMPTY_VALUE = "-"; public final static String RUNTIME_ERROR_HEADER = "X-Archive-Wayback-Runtime-Error"; private final static int MAX_ERR_HEADER_LEN = 300; //public final static String NOTFOUND_ERROR_HEADER = "X-Archive-Wayback-Not-Found"; private static final Logger LOGGER = Logger.getLogger( AccessPoint.class.getName()); private boolean exactHostMatch = false; private boolean exactSchemeMatch = false; private boolean useAnchorWindow = false; private boolean useServerName = false; private boolean serveStatic = true; private boolean bounceToReplayPrefix = false; private boolean bounceToQueryPrefix = false; private boolean forceCleanQueries = true; private boolean timestampSearch = false; public static enum PerfStat { IndexQueryTotal, WArcResource, Total, } private String errorMsgHeader = RUNTIME_ERROR_HEADER; private String perfStatsHeader = "X-Archive-Wayback-Perf"; private String warcFileHeader = "x-archive-src"; private boolean enableErrorMsgHeader = false; private boolean enablePerfStatsHeader = false; private boolean enableWarcFileHeader = false; private boolean enableMemento = true; private PerfStats.OutputFormat perfStatsHeaderFormat = PerfStats.OutputFormat.BRACKET; private LiveWebRedirector liveWebRedirector; private String staticPrefix = null; private String queryPrefix = null; private String replayPrefix = null; private String interstitialJsp = INTERSTITIAL_JSP; private String refererAuth = null; private Locale locale = null; private Properties configs = null; private List<String> filePatterns = null; private List<String> fileIncludePrefixes = null; private List<String> fileExcludePrefixes = null; private WaybackCollection collection = null; private ExceptionRenderer exception = new BaseExceptionRenderer(); private QueryRenderer query = null; private RequestParser parser = null; private ReplayDispatcher replay = null; private ResultURIConverter uriConverter = null; private MementoHandler mementoHandler = new DefaultMementoHandler(); private ExclusionFilterFactory exclusionFactory = null; private RewriteDirector rewriteDirector; private BooleanOperator<WaybackRequest> authentication = null; private boolean requestAuth = true; private long embargoMS = 0; private CustomResultFilterFactory filterFactory = null; private UrlCanonicalizer selfRedirectCanonicalizer = null; private int maxRedirectAttempts = 0; private boolean fixedEmbeds = false; public void init() { checkAccessPointAware(collection,exception,query,parser,replay, uriConverter,exclusionFactory, authentication, filterFactory); } protected boolean dispatchLocal(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Local dispatch /" + translateRequestPath(httpRequest)); } if (!serveStatic) { return false; } // String contextRelativePath = httpRequest.getServletPath(); String translatedNoQuery = "/" + translateRequestPath(httpRequest); // String absPath = getServletContext().getRealPath(contextRelativePath); String absPath = getServletContext().getRealPath(translatedNoQuery); if (this.isEnableMemento()) { MementoUtils.addDoNotNegotiateHeader(httpResponse); } //IK: added null check for absPath, it may be null (ex. on jetty) if (absPath != null) { File test = new File(absPath); if((test != null) && !test.exists()) { return false; } } String translatedQ = "/" + translateRequestPathQuery(httpRequest); WaybackRequest wbRequest = new WaybackRequest(); // wbRequest.setContextPrefix(getUrlRoot()); wbRequest.setAccessPoint(this); wbRequest.extractHttpRequestInfo(httpRequest); UIResults uiResults = new UIResults(wbRequest,uriConverter); try { uiResults.forward(httpRequest, httpResponse, translatedQ); return true; } catch(IOException e) { // TODO: figure out if we got IO because of a missing dispatcher } return false; } /** * @param httpRequest HttpServletRequest which is being handled * @param httpResponse HttpServletResponse which is being handled * @return true if the request was actually handled * @throws ServletException per usual * @throws IOException per usual */ public boolean handleRequest(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { WaybackRequest wbRequest = null; boolean handled = false; try { PerfStats.clearAll(); if (this.isEnablePerfStatsHeader() && (perfStatsHeader != null)) { PerfStats.timeStart(PerfStat.Total); httpResponse = new PerfWritingHttpServletResponse(httpRequest, httpResponse, PerfStat.Total, perfStatsHeader, perfStatsHeaderFormat); } String inputPath = translateRequestPathQuery(httpRequest); Thread.currentThread().setName("Thread " + Thread.currentThread().getId() + " " + getBeanName() + " handling: " + inputPath); LOGGER.fine("Handling translated: " + inputPath); wbRequest = getParser().parse(httpRequest, this); if (wbRequest != null) { handled = true; // TODO: refactor this code into RequestParser implementations wbRequest.setAccessPoint(this); // wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); // wbRequest.setContextPrefix(getUrlRoot()); wbRequest.extractHttpRequestInfo(httpRequest); // end of refactor if (getAuthentication() != null) { if (!getAuthentication().isTrue(wbRequest)) { throw new AuthenticationControlException( "Unauthorized", isRequestAuth()); } } // set exclusionFilter on wbRequest only if not set externally if (wbRequest.getExclusionFilter() == null) { wbRequest.setExclusionFilter(createExclusionFilter()); } // TODO: refactor this into RequestParser implementations, so a // user could alter requests to change the behavior within a // single AccessPoint. For now, this is a simple way to expose // the feature to configuration.g wbRequest.setExactScheme(isExactSchemeMatch()); if (wbRequest.isReplayRequest()) { if (bounceToReplayPrefix) { // we don't accept replay requests on this AccessPoint // bounce the user to the right place: String suffix = translateRequestPathQuery(httpRequest); String replayUrl = replayPrefix + suffix; httpResponse.sendRedirect(replayUrl); return true; } handleReplay(wbRequest, httpRequest, httpResponse); } else { if (bounceToQueryPrefix) { // we don't accept replay requests on this AccessPoint // bounce the user to the right place: String suffix = translateRequestPathQuery(httpRequest); String replayUrl = queryPrefix + suffix; httpResponse.sendRedirect(replayUrl); return true; } wbRequest.setExactHost(isExactHostMatch()); handleQuery(wbRequest, httpRequest, httpResponse); } } else { handled = dispatchLocal(httpRequest, httpResponse); } } catch (BetterRequestException e) { e.generateResponse(httpResponse, wbRequest); httpResponse.getWriter(); // cause perf headers to be committed handled = true; } catch (WaybackException e) { if (httpResponse.isCommitted()) { return true; } if (wbRequest == null) { wbRequest = new WaybackRequest(); wbRequest.setAccessPoint(this); } logError(httpResponse, errorMsgHeader, e, wbRequest); LiveWebState liveWebState = LiveWebState.NOT_FOUND; if ((getLiveWebRedirector() != null) && !wbRequest.hasMementoAcceptDatetime() && !wbRequest.isMementoTimemapRequest()) { liveWebState = getLiveWebRedirector().handleRedirect(e, wbRequest, httpRequest, httpResponse); } // If not liveweb redirected, then render current exception if (liveWebState != LiveWebState.REDIRECTED) { e.setLiveWebAvailable(liveWebState == LiveWebState.FOUND); getException().renderException(httpRequest, httpResponse, wbRequest, e, getUriConverter()); } handled = true; } catch (Exception other) { logError(httpResponse, errorMsgHeader, other, wbRequest); } finally { //Slightly hacky, but ensures that all block loaders are closed ZipNumBlockLoader.closeAllReaders(); } return handled; } /** * Return new instance of {@link ExclusionFilter} instance for this AccessPoint. * @throws AccessControlException If it cannot instantiate ExclusionFilter when * it's supposed to (i.e. configured but failed to complete because of network * error etc.) */ public ExclusionFilter createExclusionFilter() throws AccessControlException { ExclusionFilterFactory factory = getExclusionFactory(); if (factory != null) { ExclusionFilter exclusionFilter = null; if (factory instanceof ContextExclusionFilterFactory) { exclusionFilter = ((ContextExclusionFilterFactory)factory).getExclusionFilter(this); } else { exclusionFilter = factory.get(); } if (exclusionFilter == null) { throw new AdministrativeAccessControlException( "AccessControl list unavailable"); } return exclusionFilter; } return null; } public RewriteDirector getRewriteDirector() { return rewriteDirector; } public void setRewriteDirector(RewriteDirector rewriteDirector) { this.rewriteDirector = rewriteDirector; } /** * Return rewrite directive for {@code capture}. * @param capture * @return string representing rewrite rules */ public String getRewriteDirective(CaptureSearchResult capture) { String directive = null; // use getter, as it may be overridden in sub-classes. RewriteDirector rd = getRewriteDirector(); if (rd != null) { directive = rd.getRewriteDirective(this, capture); } return directive; } /** * Default implementation returns {@code null}. */ @Override public String getCollectionContextName() { return null; } public void logError(HttpServletResponse httpResponse, String header, Exception e, WaybackRequest request) { if (e instanceof ResourceNotInArchiveException) { if (LOGGER.isLoggable(Level.INFO)) { this.logNotInArchive((ResourceNotInArchiveException)e, request); } } else if (e instanceof AccessControlException) { // While StaticMapExclusionFilter#isExcluded(String) reports // exclusion at INFO level, RobotExclusionFilter logs exclusion // at FINE level only. I believe here is the better place to log // exclusion. Unfortunately, AccessControlException has no // detailed info (TODO). we don't need a stack trace. if (LOGGER.isLoggable(Level.INFO)) { LOGGER.log(Level.INFO, "Access Blocked:" + request.getRequestUrl() + ": "+ e.getMessage()); } } else { if (LOGGER.isLoggable(Level.WARNING)) { LOGGER.log(Level.WARNING, "Runtime Error", e); } } if (!this.isEnableErrorMsgHeader()) { return; } String message = (e != null ? e.toString() : ""); if (message == null) { message = ""; } else { // Get substring from exception name int index = message.indexOf(':'); if (index > 0) { index = message.lastIndexOf('.', index); if (index > 0) { message = message.substring(index + 1); } } if (message.length() > MAX_ERR_HEADER_LEN) { message = message.substring(0, MAX_ERR_HEADER_LEN); } message = message.replace('\n', ' '); } httpResponse.setHeader(header, message); } private void logNotInArchive(ResourceNotInArchiveException e, WaybackRequest r) { // TODO: move this into ResourceNotInArchiveException constructor String url = r.getRequestUrl(); StringBuilder sb = new StringBuilder(100); sb.append("NotInArchive\t"); sb.append(getBeanName()).append("\t"); sb.append(url); LOGGER.info(sb.toString()); } protected void checkAccessPointAware(Object... os) { if (os != null) { for (Object o : os) { if (o instanceof AccessPointAware) { AccessPointAware apa = (AccessPointAware)o; apa.setAccessPoint(this); } } } } private void checkInterstitialRedirect(HttpServletRequest httpRequest, WaybackRequest wbRequest) throws BetterRequestException { if ((refererAuth != null) && (refererAuth.length() > 0) && !wbRequest.hasMementoAcceptDatetime()) { String referer = httpRequest.getHeader("Referer"); if ((referer != null) && (referer.length() > 0) && (!referer.contains(refererAuth))) { StringBuffer sb = httpRequest.getRequestURL(); if (httpRequest.getQueryString() != null) { sb.append("?").append(httpRequest.getQueryString()); } StringBuilder u = new StringBuilder(); u.append(getQueryPrefix()); u.append(interstitialJsp); u.append("?"); u.append(INTERSTITIAL_SECONDS).append("=").append(5); u.append("&"); u.append(INTERSTITIAL_DATE).append("=") .append(wbRequest.getReplayDate().getTime()); u.append("&"); u.append(INTERSTITIAL_URL).append("="); try { u.append(URLEncoder.encode(wbRequest.getRequestUrl(), "UTF-8")); } catch (UnsupportedEncodingException e) { // not gonna happen... u.append(wbRequest.getRequestUrl()); } u.append("&"); u.append(INTERSTITIAL_TARGET).append("="); try { u.append(URLEncoder.encode(sb.toString(), "UTF-8")); } catch (UnsupportedEncodingException e) { // not gonna happen... u.append(sb.toString()); } throw new BetterRequestException(u.toString()); } } } protected boolean isSelfRedirect(Resource resource, CaptureSearchResult closest, WaybackRequest wbRequest, String canonRequestURL) { int status = resource.getStatusCode(); // Only applies to redirects if ((status < 300) || (status >= 400)) { return false; } String location = resource.getHeader("Location"); if (location == null) { return false; } // if (!closest.getCaptureTimestamp().equals(wbRequest.getReplayTimestamp())) { // return false; // } String redirScheme = UrlOperations.urlToScheme(location); try { if (redirScheme == null && isExactSchemeMatch()) { location = UrlOperations.resolveUrl(closest.getOriginalUrl(), location); redirScheme = UrlOperations.urlToScheme(location); } else if (location.startsWith("/")) { location = UrlOperations.resolveUrl(closest.getOriginalUrl(), location); } if (getSelfRedirectCanonicalizer() != null) { location = getSelfRedirectCanonicalizer().urlStringToKey(location); } } catch (IOException e) { return false; } if (location.equals(canonRequestURL)) { // if not exact scheme, don't do scheme compare, must be equal if (!isExactSchemeMatch()) { return true; } String origScheme = UrlOperations.urlToScheme(wbRequest .getRequestUrl()); if ((origScheme != null) && (redirScheme != null) && (origScheme.compareTo(redirScheme) == 0)) { return true; } } return false; } public SearchResults queryIndex(WaybackRequest wbRequest) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException, ConfigurationException { try { PerfStats.timeStart(PerfStat.IndexQueryTotal); return getCollection().getResourceIndex().query(wbRequest); } finally { PerfStats.timeEnd(PerfStat.IndexQueryTotal); } } /** * Per-request, decorating ResourceStore that throws * {@link ResourceNotAvailableException} if retrieval of the * archive has failed previously within the session. * It helps AccessPoint quickly scan through {@code CaptureSearchResult}s * pointing the same archive as their revisit original. * It also collects performance stats. */ protected static class SingleLoadResourceStore implements ResourceStore { private Set<String> skipFiles; private ResourceStore resourceStore; public SingleLoadResourceStore(ResourceStore realResourceStore) { this.resourceStore = realResourceStore; } protected void addSkip(String filename) { if (filename == null) return; if (skipFiles == null) skipFiles = new HashSet<String>(); skipFiles.add(filename); } protected boolean isSkipped(String filename) { return filename != null && skipFiles != null && skipFiles.contains(filename); } @Override public Resource retrieveResource(CaptureSearchResult result) throws ResourceNotAvailableException { try { PerfStats.timeStart(PerfStat.WArcResource); if (isSkipped(result.getFile())) { throw new ResourceNotAvailableException( "Revisit: Skipping already failed " + result.getFile()); } try { return resourceStore.retrieveResource(result); } catch (ResourceNotAvailableException ex) { // Old code obtained archive filename via getDtails() method of // exception object, in the code handling SepcificCaptureReplayException. // Of two subclasses of SpecificCaptureReplayException, BadContentException // (only thrown from HttpHeaderOperation.copyHTTPMessageHeader()) never had // non-null details. So, this covers all cases, and more robust. addSkip(result.getFile()); throw ex; } } finally { PerfStats.timeEnd(PerfStat.WArcResource); } } @Override public void shutdown() throws IOException { } } public boolean isWaybackReferer(WaybackRequest wbRequest, String path) { return isWaybackReferer(wbRequest.getRefererUrl(), path); } public boolean isWaybackReferer(String referer, String path) { if (referer == null) { return false; } Object value = this.getConfigs().get("fullPathPrefix"); String fullPathPrefix = (value != null ? value.toString() : null); if (fullPathPrefix != null && !fullPathPrefix.isEmpty()) { return referer.contains(fullPathPrefix + path); } else { return referer.contains(path); } } /** * if capture {@code closest} is of timestamp different from the one requested, * redirect to exact Archival-URL for {@code closest}. * Memento Timegate request is always redirected regardless of timestamp. * Needs better method name. * @param wbRequest * @param httpResponse * @param captureResults * @param closest * @throws BetterRequestException */ protected void handleReplayRedirect(WaybackRequest wbRequest, HttpServletResponse httpResponse, CaptureSearchResults captureResults, CaptureSearchResult closest) throws BetterRequestException { if (wbRequest.getReplayTimestamp().startsWith(closest.getCaptureTimestamp()) && !wbRequest.isMementoTimegate()) { // Matching return; } captureResults.setClosest(closest); //TODO: better detection of non-redirect proxy mode? // For now, checking if the betterURI does not contain the timestamp, then we're not doing a redirect String datespec = ArchivalUrl.getDateSpec(wbRequest, closest.getCaptureTimestamp()); String betterURI = getUriConverter().makeReplayURI(datespec, closest.getOriginalUrl()); // if spare-redirect-for-embeds is on, render embedded resource in-place with Content-Location header pointing // exact replay URL (it is disabled for timegate requests) // XXX set Content-Location header somewhere else. if (fixedEmbeds && !wbRequest.isMementoTimegate() && isWaybackReferer(wbRequest, this.getReplayPrefix())) { httpResponse.setHeader("Content-Location", betterURI); return; } boolean isNonRedirectProxy = !betterURI.contains(closest.getCaptureTimestamp()); if (!isNonRedirectProxy) { throw new BetterReplayRequestException(closest, captureResults); } } /** * return {@code true} if capture's timestamp matches exactly what's requested. * If requested timestamp is less specific (i.e. less digits) than capture's * timestamp, it is considered non-matching. On the other hand, capture's * timestamp being prefix of requested timestamp is considered a match (this is * to support captures with timestamp shorter than 14-digits. this may change). * @param closest capture to check * @param wbRequest request object * @return {@code true} if match */ private static boolean timestampMatch(CaptureSearchResult closest, WaybackRequest wbRequest) { String replayTimestamp = wbRequest.getReplayTimestamp(); String captureTimestamp = closest.getCaptureTimestamp(); if (replayTimestamp.length() < captureTimestamp.length()) return false; if (replayTimestamp.startsWith(captureTimestamp)) return true; // if looking for latest date, consider it a tentative match, until // checking if it's replay-able. if (wbRequest.isBestLatestReplayRequest()) return true; return false; } protected void handleReplay(WaybackRequest wbRequest, HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws IOException, ServletException, WaybackException { checkInterstitialRedirect(httpRequest,wbRequest); String requestURL = wbRequest.getRequestUrl(); if (getSelfRedirectCanonicalizer() != null) { try { requestURL = getSelfRedirectCanonicalizer().urlStringToKey(requestURL); } catch (IOException io) { } } PerformanceLogger p = new PerformanceLogger("replay"); // If optimized url+timestamp search is supported, mark the request if (this.isTimestampSearch()) { if (wbRequest.isAnyEmbeddedContext() || wbRequest.isIdentityContext()) { wbRequest.setTimestampSearchKey(true); } } CaptureSearchResults captureResults; try { captureResults = searchCaptures(wbRequest); } finally { p.queried(); } ReplayCaptureSelector captureSelector = new DefaultReplayCaptureSelector(getReplay()); captureSelector.setRequest(wbRequest); captureSelector.setCaptures(captureResults); CaptureSearchResult closest = captureSelector.next(); int counter = 0; //TODO: parameterize //int maxTimeouts = 2; //int maxMissingRevisits = 2; SingleLoadResourceStore resourceStore = new SingleLoadResourceStore(getCollection().getResourceStore()); //Set<String> skipFiles = null; while (true) { // Support for redirect from the CDX redirectUrl field // This was the intended use of the redirect field, but has not actually be tested // To enable this functionality, uncomment the lines below // This is an optimization that allows for redirects to be handled without loading the original content // //String redir = closest.getRedirectUrl(); //if ((redir != null) && !redir.equals("-")) { // String fullRedirect = getUriConverter().makeReplayURI(closest.getCaptureTimestamp(), redir); // throw new BetterRequestException(fullRedirect, Integer.valueOf(closest.getHttpCode())); //} Resource httpHeadersResource = null; Resource payloadResource = null; boolean isRevisit = false; try { counter++; if (closest == null) { throw new ResourceNotAvailableException("Self-Redirect: No Closest Match Found", 404); } closest.setClosest(true); checkAnchorWindow(wbRequest, closest); // Redirect to url for the actual closest capture, if not a retry if (counter == 1) { handleReplayRedirect(wbRequest, httpResponse, captureResults, closest); } // If revisit, may load two resources separately if (closest.isRevisitDigest()) { isRevisit = true; // If the payload record is known and it failed before with this payload, don't try // loading the header resource even.. outcome will likely be same if (resourceStore.isSkipped(closest.getDuplicatePayloadFile())) { // (XXX cannot simply call SessionResourceStore#retrieveResource() because of this // counter thing - is there a better way?) counter--; //don't really count this as we're not even checking the file anymore throw new ResourceNotAvailableException( "Revisit: Skipping already failed " + closest.getDuplicatePayloadFile()); } if ((closest.getDuplicatePayloadFile() == null) && wbRequest.isTimestampSearchKey()) { // If a missing revisit and loaded optimized, try loading the entire timeline again wbRequest.setTimestampSearchKey(false); captureResults = searchCaptures(wbRequest); closest = captureSelector.next(); //originalClosest = closest; //maxTimeouts *= 2; //maxMissingRevisits *= 2; continue; } // If old-style arc revisit (no mimetype, filename is '-'), then don't load // headersResource = payloadResource if (EMPTY_VALUE.equals(closest.getFile())) { closest.setFile(closest.getDuplicatePayloadFile()); closest.setOffset(closest.getDuplicatePayloadOffset()); // See that this is successful httpHeadersResource = resourceStore.retrieveResource(closest); // Hmm, since this is a revisit it should not redirect -- was: if both headers and payload are from a different timestamp, redirect to that timestamp // if (!closest.getCaptureTimestamp().equals(closest.getDuplicateDigestStoredTimestamp())) { // throwRedirect(wbRequest, httpResponse, captureResults, closest.getDuplicateDigestStoredTimestamp(), closest.getOriginalUrl(), closest.getHttpCode()); // } payloadResource = httpHeadersResource; } else { httpHeadersResource = resourceStore.retrieveResource(closest); CaptureSearchResult payloadLocation = retrievePayloadForIdenticalContentRevisit(wbRequest, httpHeadersResource, closest); if (payloadLocation == null) { throw new ResourceNotAvailableException("Revisit: Missing original for revisit record " + closest.toString(), 404); } payloadResource = resourceStore.retrieveResource(payloadLocation); // If zero length old-style revisit with no headers, then must use payloadResource as headersResource if (httpHeadersResource.getRecordLength() <= 0) { httpHeadersResource.close(); httpHeadersResource = payloadResource; } } } else { httpHeadersResource = resourceStore.retrieveResource(closest); payloadResource = httpHeadersResource; } // Ensure that we are not self-redirecting! // If the status is a redirect, check that the location or url date's are different from the current request // Otherwise, replay the previous matched capture. // This chain is unlikely to go past one previous capture, but is possible if (isSelfRedirect(httpHeadersResource, closest, wbRequest, requestURL)) { LOGGER.info("Self-Redirect: Skipping " + closest.getCaptureTimestamp() + "/" + closest.getOriginalUrl()); //closest = findNextClosest(closest, captureResults, requestMS); closest = captureSelector.next(); continue; } if (counter > 1) { handleReplayRedirect(wbRequest, httpResponse, captureResults, closest); } p.retrieved(); ReplayRenderer renderer = getReplay().getRenderer(wbRequest, closest, httpHeadersResource, payloadResource); if (this.isEnableWarcFileHeader() && (warcFileHeader != null)) { if (isRevisit && (closest.getDuplicatePayloadFile() != null)) { httpResponse.addHeader(warcFileHeader, closest.getDuplicatePayloadFile()); } else { httpResponse.addHeader(warcFileHeader, closest.getFile()); } } if (this.isEnableMemento()) { MementoUtils.addMementoDatetimeHeader(httpResponse, closest); if (wbRequest.isMementoTimegate()) { // URL-G in non-redirect proxy mode (archival-url URL-G // always redirects in handleReplayRedirect()). if (getMementoHandler() != null) { getMementoHandler().addTimegateHeaders( httpResponse, captureResults, wbRequest, true); } else { // bare minimum required for URL-G response [sic] // XXX this lacks Vary: accept-datetime header required for URL-G MementoUtils.addOrigHeader(httpResponse, closest.getOriginalUrl()); // Probably this is better - same as DefaultMementoHandler //MementoUtils.addTimegateHeaders(httpResponse, captureResults, wbRequest, true); } } else { // Memento URL-M response (can't be an intermediate resource) MementoUtils.addLinkHeader(httpResponse, captureResults, wbRequest, true, true); } } renderer.renderResource(httpRequest, httpResponse, wbRequest, closest, httpHeadersResource, payloadResource, getUriConverter(), captureResults); p.rendered(); p.write(wbRequest.getReplayTimestamp() + " " + wbRequest.getRequestUrl()); break; } catch (SpecificCaptureReplayException scre) { // Primarily ResourceNotAvailableException from ResourceStore, // but renderer.renderResource(...) above can throw // BadContentException (very rare). //final String SOCKET_TIMEOUT_MSG = "java.net.SocketTimeoutException: Read timed out"; CaptureSearchResult nextClosest = null; // if exceed maxRedirectAttempts, stop if ((counter > maxRedirectAttempts) && ((this.getLiveWebPrefix() == null) || !isWaybackReferer(wbRequest, this.getLiveWebPrefix()))) { LOGGER.info("LOADFAIL: Timeout: Too many retries, limited to " + maxRedirectAttempts); } else if ((closest != null) && !wbRequest.isIdentityContext()) { //nextClosest = findNextClosest(closest, captureResults, requestMS); nextClosest = captureSelector.next(); } // Skip any nextClosest that has the same exact filename? // Removing in case skip something that works.. // while ((nextClosest != null) && closest.getFile().equals(nextClosest.getFile())) { // nextClosest = findNextClosest(nextClosest, captureResults, requestMS); //} String msg = null; if (closest != null) { msg = scre.getMessage() + " /" + closest.getCaptureTimestamp() + "/" + closest.getOriginalUrl(); } else { msg = scre.getMessage() + " /" + wbRequest.getReplayTimestamp() + "/" + wbRequest.getRequestUrl(); } if (nextClosest != null) { if (msg.startsWith("Self-Redirect")) { LOGGER.info("(" + counter + ")LOADFAIL-> " + msg + " -> " + nextClosest.getCaptureTimestamp()); } else { LOGGER.warning("(" + counter + ")LOADFAIL-> " + msg + " -> " + nextClosest.getCaptureTimestamp()); } closest = nextClosest; } else if (wbRequest.isTimestampSearchKey()) { wbRequest.setTimestampSearchKey(false); captureResults = searchCaptures(wbRequest); captureSelector.setCaptures(captureResults); closest = captureSelector.next(); //originalClosest = closest; //maxTimeouts *= 2; //maxMissingRevisits *= 2; continue; } else { LOGGER.warning("(" + counter + ")LOADFAIL: " + msg); scre.setCaptureContext(captureResults, closest); throw scre; } } finally { closeResources(payloadResource, httpHeadersResource); } } } protected CaptureSearchResults searchCaptures(WaybackRequest wbr) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException, ConfigurationException, ResourceNotAvailableException { SearchResults results = queryIndex(wbr); if (!(results instanceof CaptureSearchResults)) { throw new ResourceNotAvailableException( "Bad results looking up " + wbr.getReplayTimestamp() + " " + wbr.getRequestUrl()); } return (CaptureSearchResults)results; } // method isWarcRevisitNotModified(Resource) method has been moved to // WarcResource#isRevisitNotModified(). /** * If closest * @param currRequest * @param revisitRecord * @param closest * @return the payload resource * @throws ResourceNotAvailableException * @throws ConfigurationException * @throws AccessControlException * @throws BadQueryException * @throws ResourceNotInArchiveException * @throws ResourceIndexNotAvailableException * @see WARCRevisitAnnotationFilter */ protected CaptureSearchResult retrievePayloadForIdenticalContentRevisit( WaybackRequest currRequest, Resource revisitRecord, CaptureSearchResult closest) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException, ConfigurationException, ResourceNotAvailableException { if (!closest.isRevisitDigest()) { LOGGER.warning("Revisit: record is not a revisit by identical content digest " + closest.getCaptureTimestamp() + " " + closest.getOriginalUrl()); return null; } CaptureSearchResult payloadLocation = null; // Revisit from same url -- should have been found by the loader if (closest.getDuplicatePayloadFile() != null && closest.getDuplicatePayloadOffset() != null) { payloadLocation = new CaptureSearchResult(); payloadLocation.setFile(closest.getDuplicatePayloadFile()); payloadLocation.setOffset(closest.getDuplicatePayloadOffset()); payloadLocation.setCompressedLength(closest.getDuplicatePayloadCompressedLength()); return payloadLocation; } // Url Agnostic Revisit with target-uri and refers-to-date String payloadUri = revisitRecord.getRefersToTargetURI(); String payloadTimestamp = revisitRecord.getRefersToDate(); if (payloadUri != null && payloadTimestamp != null) { WaybackRequest wbr = currRequest.clone(); wbr.setReplayTimestamp(payloadTimestamp); wbr.setAnchorTimestamp(payloadTimestamp); wbr.setTimestampSearchKey(true); wbr.setRequestUrl(payloadUri); // experimental parameter to tell EmbeddedCDXServerIndex // that it's looking up the payload of URL-agnostic revisit. // EmbeddedCDXServerIndex will include soft-blocked captures // in the result. wbr.put(EmbeddedCDXServerIndex.REQUEST_REVISIT_LOOKUP, "true"); CaptureSearchResults payloadCaptureResults = searchCaptures(wbr); // closest may not be the one pointed by payloadTimestamp ReplayCaptureSelector captureSelector = new DefaultReplayCaptureSelector(getReplay()); captureSelector.setRequest(wbr); captureSelector.setCaptures(payloadCaptureResults); payloadLocation = captureSelector.next(); // closest will not be the one pointed by payloadTimestamp if revisited // capture is missing (can happen for many reasons; not indexed yet, archive // has gone missing, for example). // TODO: this is pretty inefficient. should have a method for searching // just one capture at specific timestamp. Perhaps timestampSearchKey // is meant for this purpose, but it's not working as expected, apparently. if (payloadLocation != null) { String captureTimestamp = payloadLocation.getCaptureTimestamp(); // not supporting captureTimestamp less than 14 digits. if (!captureTimestamp.equals(payloadTimestamp)) payloadLocation = null; } } // if (payloadLocation != null) { // return payloadLocation; // } // // Less common less recommended revisit with specific warc/filename // WarcResource wr = (WarcResource) revisitRecord; // warcHeaders = wr.getWarcHeaders().getHeaderFields(); // String payloadWarcFile = (String) warcHeaders.get("WARC-Refers-To-Filename"); // String offsetStr = (String) warcHeaders.get("WARC-Refers-To-File-Offset"); // if (payloadWarcFile != null && offsetStr != null) { // payloadLocation = new CaptureSearchResult(); // payloadLocation.setFile(payloadWarcFile); // payloadLocation.setOffset(Long.parseLong(offsetStr)); // } return payloadLocation; } private void checkAnchorWindow(WaybackRequest wbRequest, CaptureSearchResult result) throws AnchorWindowTooSmallException { if (isUseAnchorWindow()) { String anchorDate = wbRequest.getAnchorTimestamp(); if (anchorDate != null) { long wantTime = wbRequest.getReplayDate().getTime(); long maxWindow = wbRequest.getAnchorWindow() * 1000; if (maxWindow > 0) { long closestDistance = Math.abs(wantTime - result.getCaptureDate().getTime()); if (closestDistance > maxWindow) { throw new AnchorWindowTooSmallException("Closest is " + closestDistance + " seconds away, Window is " + maxWindow); } } } } } private int queryCollapseTime = -1; /** * CDXServer {@code collapseTime} parameter for capture query. * @param queryCollapseTime integer, negative value instructs * CDXServer to use the default value. */ public void setQueryCollapseTime(int queryCollapseTime) { this.queryCollapseTime = queryCollapseTime; } public int getQueryCollapseTime() { return queryCollapseTime; } protected void handleQuery(WaybackRequest wbRequest, HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException, WaybackException { PerformanceLogger p = new PerformanceLogger("query"); // TODO: move this Memento code out of this method. // Memento: render timemap if ((this.getMementoHandler() != null) && (wbRequest.isMementoTimemapRequest())) { if (this.getMementoHandler().renderMementoTimemap(wbRequest, httpRequest, httpResponse)) { return; } } // TODO: should this be applied to Memento Timemap as well? // Must call getQueryCollapseTime() because AccessPointAdapter // needs to read parent's value, not the unused field of its // own. wbRequest.setCollapseTime(getQueryCollapseTime()); SearchResults results = queryIndex(wbRequest); p.queried(); if (results instanceof CaptureSearchResults) { CaptureSearchResults cResults = (CaptureSearchResults)results; // The Firefox proxy plugin maks an XML request to populate the // list of available captures, and needs the closest result to // the one being replayed to be flagged as such: CaptureSearchResult closest = cResults.getClosest(); if (closest != null) { closest.setClosest(true); } getQuery().renderCaptureResults(httpRequest, httpResponse, wbRequest, cResults, getUriConverter()); } else if (results instanceof UrlSearchResults) { UrlSearchResults uResults = (UrlSearchResults)results; getQuery().renderUrlResults(httpRequest, httpResponse, wbRequest, uResults, getUriConverter()); } else { throw new WaybackException("Unknown index format"); } p.rendered(); p.write(wbRequest.getRequestUrl()); } /** * Release any resources associated with this AccessPoint, including * stopping any background processing threads */ @Override public void shutdown() { if (collection != null) { try { collection.shutdown(); } catch (IOException e) { LOGGER.severe("FAILED collection shutdown" + e.getMessage()); } } if (exclusionFactory != null) { exclusionFactory.shutdown(); } } protected void closeResources(Resource payloadResource, Resource httpHeadersResource) { if ((payloadResource != null) && (payloadResource != httpHeadersResource)) { try { payloadResource.close(); } catch (IOException e) { LOGGER.warning(e.toString()); } } if (httpHeadersResource != null) { try { httpHeadersResource.close(); } catch (IOException e) { LOGGER.warning(e.toString()); } } } private String getBestPrefix(String best, String next, String last) { if (best != null) { return best; } if (next != null) { return next; } return last; } /* * ******************************************************************* * ******************************************************************* * * ALL GETTER/SETTER BELOW HERE * * ******************************************************************* * ******************************************************************* */ /** * @return the exactHostMatch */ public boolean isExactHostMatch() { return exactHostMatch; } /** * @param exactHostMatch if true, then only SearchResults exactly matching * the requested hostname will be returned from this AccessPoint. If * false, then hosts which canonicalize to the same host as requested * hostname will be returned (www.) */ public void setExactHostMatch(boolean exactHostMatch) { this.exactHostMatch = exactHostMatch; } /** * @return the exactSchemeMatch */ public boolean isExactSchemeMatch() { return exactSchemeMatch; } /** * @param exactSchemeMatch the exactSchemeMatch to set */ public void setExactSchemeMatch(boolean exactSchemeMatch) { this.exactSchemeMatch = exactSchemeMatch; } /** * @return true if this AccessPoint is configured to useAnchorWindow, that * is, to replay documents only if they are within a certain proximity to * the users requested AnchorDate */ public boolean isUseAnchorWindow() { return useAnchorWindow; } /** * @param useAnchorWindow , when set to true, causes this AccessPoint to * only replay documents if they are within a certain proximity to * the users requested AnchorDate */ public void setUseAnchorWindow(boolean useAnchorWindow) { this.useAnchorWindow = useAnchorWindow; } /** * @return the useServerName * @deprecated no longer used, use {replay,query,static}Prefix */ public boolean isUseServerName() { return useServerName; } /** * @param useServerName the useServerName to set * @deprecated no longer used, use {replay,query,static}Prefix */ public void setUseServerName(boolean useServerName) { this.useServerName = useServerName; } /** * @return true if this AccessPoint serves static content */ public boolean isServeStatic() { return serveStatic; } /** * @param serveStatic if set to true, this AccessPoint will serve static * content, and .jsp files */ public void setServeStatic(boolean serveStatic) { this.serveStatic = serveStatic; } /** * @return the livewebRedirector which determines if custom loading or handling * is done for resources that are not successfully loaded, */ public LiveWebRedirector getLiveWebRedirector() { return liveWebRedirector; } /** * @param liveWebRedirector Set the {@link LiveWebRedirector} to use to try and retrieve documents * from the live web on demand when missing from the collection. */ public void setLiveWebRedirector(LiveWebRedirector liveWebRedirector) { this.liveWebRedirector = liveWebRedirector; } // Set standard liveweb redirector public void setLiveWebPrefix(String liveWebPrefix) { if (liveWebPrefix == null || liveWebPrefix.isEmpty()) { this.liveWebRedirector = null; } this.liveWebRedirector = new DefaultLiveWebRedirector(liveWebPrefix); } public String getLiveWebPrefix() { if (this.liveWebRedirector == null) { return null; } return this.liveWebRedirector.getLiveWebPrefix(); } /** * @return the String url prefix to use when generating self referencing * static URLs */ public String getStaticPrefix() { return getBestPrefix(staticPrefix,queryPrefix,replayPrefix); } /** * @param staticPrefix explicit URL prefix to use when creating self referencing * static URLs */ public void setStaticPrefix(String staticPrefix) { this.staticPrefix = staticPrefix; } /** * @return the String url prefix to use when generating self referencing * replay URLs */ public String getReplayPrefix() { return getBestPrefix(replayPrefix, queryPrefix, staticPrefix); } /** * @param replayPrefix explicit URL prefix to use when creating self referencing * replay URLs */ public void setReplayPrefix(String replayPrefix) { this.replayPrefix = replayPrefix; } /** * @param queryPrefix explicit URL prefix to use when creating self referencing * query URLs */ public void setQueryPrefix(String queryPrefix) { this.queryPrefix = queryPrefix; } /** * @return the String url prefix to use when generating self referencing * replay URLs */ public String getQueryPrefix() { return getBestPrefix(queryPrefix, staticPrefix, replayPrefix); } /** * Build a self-referencing URL that will perform a query for all copies * of URL {@code url}. * @param url URL to search for copies of * @param startdate start of date range in DT14 format (may be {@code null} * for no date range. * @param enddate end of date range in DT14 format (may be {@code null}, ignored * if {@code startdate} is {@code null}) * @return String URL that will make a query for all captures of {@code url}. */ public String makeCaptureQueryUrl(String url, String startdate, String enddate) { // XXX assumes particular style of query URL, which may not be compatible // with RequestParsers in use. TODO: refactor. if (startdate != null) { if (enddate != null) { return getQueryPrefix() + startdate + "-" + enddate + "*/" + url; } else { return getQueryPrefix() + startdate + "*/" + url; } } else { return getQueryPrefix() + "*/" + url; } } /** * @param interstitialJsp the interstitialJsp to set */ public void setInterstitialJsp(String interstitialJsp) { this.interstitialJsp = interstitialJsp; } /** * @return the interstitialJsp */ public String getInterstitialJsp() { return interstitialJsp; } /** * @param urlRoot explicit URL prefix to use when creating ANY self * referencing URLs * @deprecated use setQueryPrefix, setReplayPrefix, setStaticPrefix */ public void setUrlRoot(String urlRoot) { this.queryPrefix = urlRoot; this.replayPrefix = urlRoot; this.staticPrefix = urlRoot; } /** * @return the String url prefix used when generating self referencing * URLs * @deprecated use getQueryPrefix, getReplayPrefix, getStaticPrefix */ public String getUrlRoot() { return getBestPrefix(queryPrefix,staticPrefix,replayPrefix); } /** * @return explicit Locale to use within this AccessPoint. */ public Locale getLocale() { return locale; } /** * @param locale explicit Locale to use for requests within this * AccessPoint. If not set, will attempt to use the one specified by * each requests User Agent via HTTP headers */ public void setLocale(Locale locale) { this.locale = locale; } /** * @return the generic customization Properties used with this AccessPoint, * generally to tune the UI */ public Properties getConfigs() { return configs; } /** * @param configs the generic customization Properties to use with this * AccessPoint, generally used to tune the UI */ public void setConfigs(Properties configs) { this.configs = configs; } /** * @return List of file patterns that will be matched when querying the * ResourceIndex */ public List<String> getFilePatterns() { return filePatterns; } /** * @param filePatterns List of file Patterns (regular expressions) that * will be matched when querying the ResourceIndex - only SearchResults * matching one of these patterns will be returned. */ public void setFilePatterns(List<String> filePatterns) { this.filePatterns = filePatterns; } /** * @return List of file String prefixes that will be matched when querying * the ResourceIndex */ public List<String> getFileIncludePrefixes() { return fileIncludePrefixes; } /** * @param fileIncludePrefixes List of String file prefixes that will be matched * when querying the ResourceIndex - only SearchResults from files * with a prefix matching one of those in this List will be returned. */ public void setFileIncludePrefixes(List<String> fileIncludePrefixes) { this.fileIncludePrefixes = fileIncludePrefixes; } /** * @return List of file String prefixes that will be matched when querying * the ResourceIndex */ public List<String> getFileExcludePrefixes() { return fileExcludePrefixes; } /** * @param fileExcludePrefixes List of String file prefixes that will be matched * when querying the ResourceIndex - only SearchResults from files * with a prefix matching one of those in this List will be returned. */ public void setFileExcludePrefixes(List<String> fileExcludePrefixes) { this.fileExcludePrefixes = fileExcludePrefixes; } /** * @return the WaybackCollection used by this AccessPoint */ public WaybackCollection getCollection() { return collection; } /** * @param collection the WaybackCollection to use with this AccessPoint */ public void setCollection(WaybackCollection collection) { this.collection = collection; } /** * @return the ExceptionRenderer in use with this AccessPoint */ public ExceptionRenderer getException() { return exception; } /** * @param exception the ExceptionRender to use with this AccessPoint */ public void setException(ExceptionRenderer exception) { this.exception = exception; } /** * @return the QueryRenderer to use with this AccessPoint */ public QueryRenderer getQuery() { return query; } /** * @param query the QueryRenderer responsible for returning query data to * clients. */ public void setQuery(QueryRenderer query) { this.query = query; } /** * @return the RequestParser used by this AccessPoint to attempt to * translate incoming HttpServletRequest objects into WaybackRequest * objects */ public RequestParser getParser() { return parser; } /** * @param parser the RequestParser to use with this AccessPoint */ public void setParser(RequestParser parser) { this.parser = parser; } /** * @return the ReplayDispatcher to use with this AccessPoint, responsible * for returning an appropriate ReplayRenderer given the user request and * the returned document type. */ public ReplayDispatcher getReplay() { return replay; } /** * @param replay the ReplayDispatcher to use with this AccessPoint. */ public void setReplay(ReplayDispatcher replay) { this.replay = replay; } /** * @return the ResultURIConverter used to construct Replay URLs within this * AccessPoint */ public ResultURIConverter getUriConverter() { return uriConverter; } /** * <p>Refactoring: remove this method. let {@link #getUriConverter()} create * ResultURIConverter with factory (like AccessPointAdapter does).</p> * @param uriConverter the ResultURIConverter to use with this AccessPoint * to construct Replay URLs */ public void setUriConverter(ResultURIConverter uriConverter) { this.uriConverter = uriConverter; } /** * @return the ExclusionFilterFactory in use with this AccessPoint */ public ExclusionFilterFactory getExclusionFactory() { return exclusionFactory; } /** * @param exclusionFactory all requests to this AccessPoint will create an * exclusionFilter from this factory when handling requests */ public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { this.exclusionFactory = exclusionFactory; } /** * @return the configured AuthenticationControl BooleanOperator in use with * this AccessPoint. */ public BooleanOperator<WaybackRequest> getAuthentication() { return authentication; } /** * @param auth the BooleanOperator which determines if incoming * requests are allowed to connect to this AccessPoint. */ public void setAuthentication(BooleanOperator<WaybackRequest> auth) { this.authentication = auth; } public boolean isRequestAuth() { return requestAuth; } public void setRequestAuth(boolean requestAuth) { this.requestAuth = requestAuth; } /** * @return the refererAuth */ public String getRefererAuth() { return refererAuth; } /** * @param refererAuth the refererAuth to set */ public void setRefererAuth(String refererAuth) { this.refererAuth = refererAuth; } /** * @return the bounceToReplayPrefix */ public boolean isBounceToReplayPrefix() { return bounceToReplayPrefix; } /** * @param bounceToReplayPrefix the bounceToReplayPrefix to set */ public void setBounceToReplayPrefix(boolean bounceToReplayPrefix) { this.bounceToReplayPrefix = bounceToReplayPrefix; } /** * @return the bounceToQueryPrefix */ public boolean isBounceToQueryPrefix() { return bounceToQueryPrefix; } /** * @param bounceToQueryPrefix the bounceToQueryPrefix to set */ public void setBounceToQueryPrefix(boolean bounceToQueryPrefix) { this.bounceToQueryPrefix = bounceToQueryPrefix; } /** * @return the configured number of MS for min age to return from the index */ public long getEmbargoMS() { return embargoMS; } /** * @param ms minimum number of MS age for content to be served from the index */ public void setEmbargoMS(long ms) { this.embargoMS = ms; } /** * @return the forceCleanQueries */ public boolean isForceCleanQueries() { return forceCleanQueries; } /** * @param forceCleanQueries the forceCleanQueries to set */ public void setForceCleanQueries(boolean forceCleanQueries) { this.forceCleanQueries = forceCleanQueries; } /** * {@link CustomResultFilterFactory} to be applied on CDX query result. * <p>AccessPoint itself does not use this object. * {@link org.archive.wayback.ResourceIndex} implementation needs to implement filtering * using this property. {@code ClusterResourceIndex} is the only * implementation known at this moment.</p> * <p>Note: this property will likely be removed in the future.</p> * @param filterFactory the filterFactory to set */ public void setFilterFactory(CustomResultFilterFactory filterFactory) { this.filterFactory = filterFactory; } /** * @return the filterFactory */ public CustomResultFilterFactory getFilterFactory() { return filterFactory; } /** * Optional * @param selfRedirectCanonicalizer */ public void setSelfRedirectCanonicalizer( UrlCanonicalizer selfRedirectCanonicalizer) { this.selfRedirectCanonicalizer = selfRedirectCanonicalizer; } /** * * URL canonicalizer for testing self-redirect. * @return The {@link UrlCanonicalizer} */ public UrlCanonicalizer getSelfRedirectCanonicalizer() { return this.selfRedirectCanonicalizer; } public int getMaxRedirectAttempts() { return maxRedirectAttempts; } public void setMaxRedirectAttempts(int maxRedirectAttempts) { this.maxRedirectAttempts = maxRedirectAttempts; } public boolean isFixedEmbeds() { return fixedEmbeds; } public void setFixedEmbeds(boolean fixedEmbeds) { this.fixedEmbeds = fixedEmbeds; } public boolean isTimestampSearch() { return timestampSearch; } public void setTimestampSearch(boolean timestampSearch) { this.timestampSearch = timestampSearch; } public String getPerfStatsHeader() { return perfStatsHeader; } public void setPerfStatsHeader(String perfStatsHeader) { this.perfStatsHeader = perfStatsHeader; } public String getWarcFileHeader() { return warcFileHeader; } public void setWarcFileHeader(String warcFileHeader) { this.warcFileHeader = warcFileHeader; } public String getErrorMsgHeader() { return errorMsgHeader; } public void setErrorMsgHeader(String errorMsgHeader) { this.errorMsgHeader = errorMsgHeader; } public boolean isEnableErrorMsgHeader() { return enableErrorMsgHeader; } public void setEnableErrorMsgHeader(boolean enableErrorMsgHeader) { this.enableErrorMsgHeader = enableErrorMsgHeader; } public boolean isEnablePerfStatsHeader() { return enablePerfStatsHeader; } public void setEnablePerfStatsHeader(boolean enablePerfStatsHeader) { this.enablePerfStatsHeader = enablePerfStatsHeader; } public boolean isEnableWarcFileHeader() { return enableWarcFileHeader; } public void setEnableWarcFileHeader(boolean enableWarcFileHeader) { this.enableWarcFileHeader = enableWarcFileHeader; } public boolean isEnableMemento() { return enableMemento; } public void setEnableMemento(boolean enableMemento) { this.enableMemento = enableMemento; } public MementoHandler getMementoHandler() { return mementoHandler; } public void setMementoHandler(MementoHandler mementoHandler) { this.mementoHandler = mementoHandler; } /** * Format of profiling header field. * @param perfStatsHeaderFormat * @see PerfStats.OutputFormat */ public void setPerfStatsHeaderFormat( PerfStats.OutputFormat perfStatsHeaderFormat) { this.perfStatsHeaderFormat = perfStatsHeaderFormat; } }