/** * */ package org.archive.wayback.webapp; import java.io.IOException; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; import javax.servlet.RequestDispatcher; import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import junit.framework.TestCase; import org.archive.format.ArchiveFileConstants; import org.archive.io.ArchiveRecordHeader; import org.archive.io.warc.TestWARCReader; import org.archive.io.warc.TestWARCRecordInfo; import org.archive.io.warc.WARCRecord; import org.archive.wayback.QueryRenderer; import org.archive.wayback.ReplayDispatcher; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.RequestParser; import org.archive.wayback.ResourceIndex; import org.archive.wayback.ResourceStore; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter; import org.archive.wayback.authenticationcontrol.AccessControlSettingOperation; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.FastCaptureSearchResult; import org.archive.wayback.core.Resource; import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.exception.WaybackException; import org.archive.wayback.memento.MementoHandler; import org.archive.wayback.memento.MementoUtils; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.resourcestore.resourcefile.ArcResource; import org.archive.wayback.resourcestore.resourcefile.WarcResource; import org.archive.wayback.util.operator.TrueBooleanOperator; import org.archive.wayback.util.url.KeyMakerUrlCanonicalizer; import org.archive.wayback.util.webapp.RequestMapper; import org.easymock.EasyMock; import org.easymock.IAnswer; import org.easymock.IArgumentMatcher; /** * unit test for {@link AccessPoint}. * * TODO: this unit test is too complex. it is because AccessPoint class has too * much responsibility and many execution paths. some good refactoring of * AccessPoint class would help. * * @author Kenji Nagahashi * */ public class AccessPointTest extends TestCase { AccessPoint cut; // AccessPoint public interface // init() // handleRequest() // queryIndex(WaybackRequest) - actually it's only used internally. it's // public just because // LiveWebAccessPoint is reusing it. LiveWebAccessPoint also calls // getReplay() for rendering // resource in accordance with rendering mode configured. // shutdown() // dependencies // selfRedirectCanonicalizer: UrlCanonicalizer // filterFactory: CustomResultFilterFactory // exclusionFactory: ExclusionFilterFactory // uriConverter: ResultURIConverter // replay: ReplayDispatcher // parser: RequestParser // query: QueryRenderer // exception: ExceptionRenderer // collection: WaybackCollection // - resourceStore: ResourceStore // - resourceIndex: ResourceIndex // configs: Properties // liveWebRedirector: LiveWebRedirector WaybackCollection collection; ResourceStore resourceStore; ResourceIndex resourceIndex; HttpServletRequest httpRequest; HttpServletResponse httpResponse; RequestDispatcher requestDispatcher; RequestParser parser; // ResultURIConverter uriConverter; QueryRenderer query; ReplayDispatcher replay; WaybackRequest wbRequest; ReplayRenderer replayRenderer; /** * setup HttpServletRequest stubs * @param contextPath servlet context path. typically {@code "/"} * @param uri servlet URI. typically {@code "/"} * @param contextPathPrefix */ protected void setupRequestStub(String contextPath, String uri, String contextPathPrefix) { EasyMock.expect(httpRequest.getRequestURI()).andStubReturn(uri); EasyMock.expect(httpRequest.getRequestURL()).andStubReturn( new StringBuffer(uri)); // EasyMock.expect(httpRequest.getQueryString()).andReturn(null); // remote address test // EasyMock.expect(httpRequest.getHeader("X-Forwarded-For")).andReturn(null); // Ajax mode test // EasyMock.expect(httpRequest.getHeader("X-Requested-With")).andReturn("XMLHttpRequest"); // used by RequestMapper#getRequestPathPrefix(HttpServletRequest) // typical value found in // ia-wayback-projects/projects/global-wayback/configs/local/wayback.properties // TODO: RequestMapper#getRequestContextPath(HttpServletRequest) assumes // value of this // attribute ends with "/". RequestMapper has constant declaration for // "webapp-request-context-path-prefix", but it's private. EasyMock.expect( httpRequest.getAttribute("webapp-request-context-path-prefix")) .andStubReturn(contextPathPrefix); EasyMock.expect(httpRequest.getLocalName()).andStubReturn("localhost"); // commented out because these are default behavior for stub ("Nice") // mock. // EasyMock.expect(httpRequest.getAuthType()).andReturn(null).anyTimes(); // EasyMock.expect(httpRequest.getRemoteUser()).andReturn(null).anyTimes(); // EasyMock.expect(httpRequest.getHeader(WaybackRequest.REQUEST_AUTHORIZATION)).andReturn(null); EasyMock.expect(httpRequest.getLocalPort()).andStubReturn(8080); EasyMock.expect(httpRequest.getContextPath()).andStubReturn("/static"); EasyMock.expect(httpRequest.getLocale()).andStubReturn( Locale.CANADA_FRENCH); EasyMock.expect( httpRequest.getRequestDispatcher(EasyMock.<String>notNull())) .andStubReturn(requestDispatcher); // EasyMock.expect(httpRequest.getCookies()).andReturn(null).anyTimes(); } // values used in global wayback configuration. public static final String WEB_PREFIX = "/web/"; public static final String STATIC_PREFIX = "/static/"; /* * (non-Javadoc) * * @see junit.framework.TestCase#setUp() */ protected void setUp() throws Exception { super.setUp(); cut = new AccessPoint(); cut.setEnablePerfStatsHeader(false); cut.setEnableMemento(false); cut.setExclusionFactory(null); cut.setExactSchemeMatch(false); // default cut.setExactHostMatch(false); // default cut.setEnableWarcFileHeader(false); cut.setReplayPrefix(WEB_PREFIX); cut.setQueryPrefix(WEB_PREFIX); cut.setStaticPrefix(STATIC_PREFIX); KeyMakerUrlCanonicalizer canonicalizer = new KeyMakerUrlCanonicalizer(); cut.setSelfRedirectCanonicalizer(canonicalizer); resourceStore = EasyMock.createMock(ResourceStore.class); resourceIndex = EasyMock.createMock(ResourceIndex.class); collection = new WaybackCollection(); collection.setResourceIndex(resourceIndex); collection.setResourceStore(resourceStore); cut.setCollection(collection); // behavior returning null are commented out because EasyMock provides // them by default. httpRequest = EasyMock.createNiceMock(HttpServletRequest.class); httpResponse = EasyMock.createMock(HttpServletResponse.class); // AccessPoint calls getWriter() just for committing response headers. // Return value // does not matter. EasyMock.expect(httpResponse.getWriter()).andStubReturn(null); // RequestDispatcher - setup expectations, call replay() and verify() if // method calls are expected. requestDispatcher = EasyMock.createMock(RequestDispatcher.class); // Memento mode - only called when enableMemento==true. // EasyMock.expect(httpRequest.getHeader(MementoUtils.ACCEPT_DATETIME)).andReturn(null); setupRequestStub("/", "/", null); // as we mock-ify RequestParser, WaybackRequest can be independent of // httpRequest. // it suggests HttpServletRequest method calls in setupRequestStub are // better be made through // RequestParser (TODO) // wbRequest = new WaybackRequest(); parser = EasyMock.createMock(RequestParser.class); cut.setParser(parser); EasyMock.expect(parser.parse(httpRequest, cut)).andAnswer( new IAnswer<WaybackRequest>() { @Override public WaybackRequest answer() throws Throwable { return wbRequest; } }); EasyMock.replay(parser); query = EasyMock.createMock(QueryRenderer.class); cut.setQuery(query); replay = EasyMock.createMock(ReplayDispatcher.class); cut.setReplay(replay); replayRenderer = EasyMock.createMock(ReplayRenderer.class); { ArchivalUrlResultURIConverter uc = new ArchivalUrlResultURIConverter(); uc.setReplayURIPrefix("/web/"); cut.setUriConverter(uc); } // disable logging Logger.getLogger(ArchivalUrlResultURIConverter.class.getName()) .setLevel(Level.WARNING); Logger.getLogger(PerfStats.class.getName()).setLevel(Level.WARNING); } public static Resource createTestHtmlResource(String uri, String timestamp, byte[] payloadBytes) throws IOException { // default compresssed=true - it often reveals bugs. return createTestHtmlResource(uri, timestamp, payloadBytes, true); } public static Resource createTestHtmlResource(String uri, String timestamp, byte[] payloadBytes, boolean compressed) throws IOException { TestWARCRecordInfo recinfo = compressed ? TestWARCRecordInfo .createCompressedHttpResponse("text/html", payloadBytes) : TestWARCRecordInfo.createHttpResponse("text/html", payloadBytes); recinfo.setCreate14DigitDateFromDT14(timestamp); if (uri != null) recinfo.setUrl(uri); TestWARCReader ar = new TestWARCReader(recinfo); WARCRecord rec = ar.get(0); WarcResource resource = new WarcResource(rec, ar); resource.parseHeaders(); return resource; } public static Resource createTestHtmlResource(String timestamp, byte[] payloadBytes) throws IOException { // by passing null to uri, default "http://test.example.com/" will be // used. return createTestHtmlResource(null, timestamp, payloadBytes); } /** * Create a test revisit record referring unknown capture of content-length * {@code len}. This is meant for pathological case. Use * {@link #createTestRevisitResource(String, int, boolean)} for regular case. * @param timestamp 14-digit timestamp of capture * @param len original content-length * @param withHeader {@code false} for omitting HTTP header (simulates old * revisit record). * @return new Resource object. * @throws IOException for unexpected I/O failure while buiding payload */ public static Resource createTestRevisitResource(String timestamp, int len, boolean withHeader) throws IOException { TestWARCRecordInfo recinfo = TestWARCRecordInfo .createRevisitHttpResponse("text/html", len, withHeader); recinfo.setCreate14DigitDateFromDT14(timestamp); TestWARCReader ar = new TestWARCReader(recinfo); WARCRecord rec = ar.get(0); WarcResource resource = new WarcResource(rec, ar); resource.parseHeaders(); return resource; } /** * Create a test revisit record referring Resource {@code revisited}. * @param timestamp CDX-style 14digit timestamp * @param revisited Capture being revisited (must be a {@link WarcResource} * or {@code ClassCastException} will be the result) * @param withHeader {@code true} unless you want to emulate old implementation * where revisit record had no HTTP headers. * @return new Resource object * @throws IOException for unexpected I/O error building payload */ public static Resource createTestRevisitResource(String timestamp, Resource revisited, boolean withHeader) throws IOException { String clen = revisited.getHttpHeaders().get("Content-Length"); int len = clen != null ? Integer.parseInt(clen) : -1; TestWARCRecordInfo recinfo = TestWARCRecordInfo .createRevisitHttpResponse("text/html", len, withHeader); recinfo.setCreate14DigitDateFromDT14(timestamp); ArchiveRecordHeader warcHeader = ((WarcResource)revisited).getWarcHeaders(); recinfo.addExtraHeader("WARC-Refers-To-Target-URI", warcHeader.getUrl()); recinfo.addExtraHeader("WARC-Refers-To-Date", warcHeader.getDate()); recinfo.setUrl(warcHeader.getUrl()); TestWARCReader ar = new TestWARCReader(recinfo); WARCRecord rec = ar.get(0); WarcResource resource = new WarcResource(rec, ar); resource.parseHeaders(); return resource; } /** * checks if {@code ts} has expected format (YYYYmmddHHMMSS) * @param ts timestamp string to check * @return true if ok, false otherwise */ protected static boolean validTimestamp(String ts) { return ts != null && Pattern.matches("\\d{14}", ts); } /** * Transform input date to 14-digit timestamp: 2007-08-29T18:00:26Z => * 20070829180026 (stolen from WARCRecordToSearchResultAdapter - move that * method to ArchiveUtils!) * @param input date text in ISOZ format. * @return date text in DT14 format. */ private static String transformWARCDate(final String input) { StringBuilder output = new StringBuilder(14); output.append(input.substring(0, 4)); output.append(input.substring(5, 7)); output.append(input.substring(8, 10)); output.append(input.substring(11, 13)); output.append(input.substring(14, 16)); output.append(input.substring(17, 19)); return output.toString(); } /** * setup mocks with {@code resources}. * <ul> * <li>Call {@link #setupCaptures(ResourceIndex, ResourceStore, int, Resource...)}</li> * <li>Set up {@code replay} so as to return closest from {@code getClosest}.</li> * <li>Set up {@code resourceIndex} so as to return results from {@code query(wbRequest)}</li> * </ul> * <p> * Note: {@link #wbRequest} must be set up before calling this method, or * {@code ResourceIndex} will not return expected search result set. * </p> * @param closestIndex zero-based index into {@code resources} * @param resources resources * @return CaptureSearchResults populated with CaptureSearchResult objects. * @throws Exception */ protected CaptureSearchResults setupCaptures(int closestIndex, Resource... resources) throws Exception { CaptureSearchResults results = setupCaptures(resourceIndex, resourceStore, closestIndex, resources); CaptureSearchResult closest = results.getClosest(); if (closest != null) { EasyMock.expect(replay.getClosest(wbRequest, results)).andReturn(closest); } EasyMock.expect(resourceIndex.query(wbRequest)).andReturn(results); return results; } /** * given a sequence of {@link WarcResource}s, * <ul> * <li>build CaptureSearchResults, filled with CaptureSearchResult * instances, which have auto-generated unique filename and offset of 0. * this is necessary for equality (see {@link CaptureSearchResultMatcher}).</li> * <li>setup ResourceStore mock to return Resource for each * CaptureSearchResult. * <li> * <li>if {@code closestIndex} {@code >= 0}, set corresponding * CaptureSearchResult's closest flag.</li> * </ul> * <p> * It's left as caller's responsibility to setup {@link ResourceIndex#query(WaybackRequest)} mock * to return {@code CaptureSearchResults} returned by this method. * </p> * @param resourceIndex ResourceIndex mock * @param resourceStore ResourceStore mock, can be {@code null} * @param closestIndex 0-based index of resource to be marked as * <i>closest</i> * @param resources sequence of WarcResources * @return CaptureSearchResults populated with CaptureSearchResult objects. * @throws Exception */ public static CaptureSearchResults setupCaptures( ResourceIndex resourceIndex, ResourceStore resourceStore, int closestIndex, Resource... resources) throws Exception { CaptureSearchResults results = new CaptureSearchResults(); CaptureSearchResult prev = null; for (Resource res : resources) { CaptureSearchResult result = new FastCaptureSearchResult(); if (prev != null) { prev.setNextResult(result); result.setPrevResult(prev); } // TODO: Resource should have methods for accessing URI and date if (res instanceof WarcResource) { // TODO: want to use WARCRecordToSearchResultAdapter? // WarcResource // has no method to retrieve underlining WARCRecord. ArchiveRecordHeader h = ((WarcResource)res).getWarcHeaders(); String originalUrl = h.getUrl(); String ts = (String)h.getHeaderValue("WARC-Date"); // WARC-Date is in ISOZ format. ts = transformWARCDate(ts); result.setOriginalUrl(originalUrl); result.setCaptureTimestamp(ts); result.setOffset(0); // this is (W)ARC file name in real practice. here we use // DT14 timestamp as pseudo filename (.warc.gz suffix is not // essential). result.setFile(ts + ".warc.gz"); if (res.getRefersToDate() != null) { // getRefersToDate() is supposed to return "yyyyMMddHHmmss" String refTimestamp = res.getRefersToDate(); for (CaptureSearchResult r : results.getResults()) { if (r.getCaptureTimestamp().equals(refTimestamp)) { result.flagDuplicateDigest(r); refTimestamp = null; break; } } if (refTimestamp != null) { // no original capture found - just flag it result.flagDuplicateDigest(); } } } else if (res instanceof ArcResource) { // TODO: should use ARCRecordToSearchResultAdapter? ArcResource // has getArcRecord() methods whose result may be cast to ARCRecord. // NB: ArcResource#getARCMetadata() creates a new Map object. Map<String, String> meta = ((ArcResource)res).getARCMetadata(); String originalUrl = meta .get(ArchiveFileConstants.URL_FIELD_KEY); String ts = meta.get(ArchiveFileConstants.DATE_FIELD_KEY); result.setOriginalUrl(originalUrl); result.setCaptureTimestamp(ts); } else { throw new AssertionError("unexpected Resource type: " + res.getClass()); } result.setHttpCode(Integer.toString(res.getStatusCode())); // CaptureSearchResultMatcher fails without this, but actual value // does not matter. so set it to 0. result.setOffset(0); assertTrue("invalid timestamp " + result.getCaptureTimestamp(), validTimestamp(result.getCaptureTimestamp())); if (closestIndex == 0) { result.setClosest(true); results.setClosest(result); } if (resourceStore != null) { // Note AccessPoint passes a copy of CaptureSearchResult in some // case (ex. Replay_Revisit() test). // so we need to use custom argument matcher. EasyMock .expect( resourceStore .retrieveResource(eqCaptureSearchResult(result))) .andReturn(res).anyTimes(); } results.addSearchResult(result); --closestIndex; } return results; } // REFACTORING THOUGHTS: WaybackRequest.setReplayRequest() could take // requestUrl and replayTimestamp it is semantically more clear. /** * create new WaybackRequest set up as replay request for {@code requestUrl} * at {@code replayTimestamp}. created object is set to #wbRequest. * @param wbRequest * @param requestUrl * @param replayTimestamp */ public void setReplayRequest(String requestUrl, String replayTimestamp) { wbRequest = new WaybackRequest(); wbRequest.setReplayRequest(); wbRequest.setRequestUrl(requestUrl); wbRequest.setReplayTimestamp(replayTimestamp); } /** * Setup expectation that {@code capture} is rendered. * <p> * @param capture CaptureSearchResult to be rendered * @param headersResource Resource from which HTTP headers are read * (revisit) * @param payloadResource Resource from which HTTP payload is read * (revisited) * @param results capture search results from which {@code capture} is * picked * @throws WaybackException * @throws IOException * @throws ServletException */ protected void expectRendering(CaptureSearchResult capture, Resource headersResource, Resource payloadResource, CaptureSearchResults results) throws ServletException, IOException, WaybackException { EasyMock.expect( replay.getRenderer(wbRequest, capture, headersResource, payloadResource)).andReturn(replayRenderer); replayRenderer.renderResource(httpRequest, httpResponse, wbRequest, capture, headersResource, cut.getUriConverter(), results); } /** * Setup expectation that {@code handleReplay} redirects to * {@code expectedRedirectURI}. * @param expectedRedirectURI */ protected void expectRedirect(String expectedRedirectURI) { httpResponse.setHeader("Location", expectedRedirectURI); httpResponse.setStatus(302); } /** * setup expected call to {@link ResourceIndex#query(WaybackRequest)}, * returning empty {@link UrlSearchResults}. * <p> * This is sufficient for most cases, as AccessPoint is not concerned with * UrlSearchResult, but simply passes it to query renderer. * </p> * <p> * Note: set up {@link #wbRequest} before calling this method. * </p> * @throws Exception declared, but will never be thrown */ protected void expectUrlIndexQuery() throws Exception { UrlSearchResults results = new UrlSearchResults(); EasyMock.expect(resourceIndex.query(wbRequest)).andReturn(results); query.renderUrlResults(httpRequest, httpResponse, wbRequest, results, cut.getUriConverter()); } /** * test basic behavior 1. * <ul> * <li>no authorization</li> * <li>no exclusion factory</li> * </ul> * and, * <ul> * <li> * <li>there's no capture on date matching the request.</li> * <li>closest capture is not a revisit</li> * </li> this shall result in redirect (302) response to the URL with * closest capture date in date component. * * alternative path: {@link #testBounceToReplayPrefix()} * @throws Exception */ public void testHandleRequest_Replay_1() throws Exception { // make sure wbRequesat.requestUrl, replayTimestamp are set up. setReplayRequest("http://www.example.com/", "20100601123456"); // TODO: originalUrl can be different from wbRequst.requestUrl, and it will // be reflected to redirect URL (worth testing). // CaptureSearchResults results = createCaptureSearchResults( // "20100601000000", "http://www.example.com/", "200"); // CaptureSearchResult closest = results.getClosest(); // TODO: this can be different from wbRequst.requestUrl, and it will be // reflected to redirect URL. // closest.setOriginalUrl("http://www.example.com/"); // closest.setHttpCode("200"); // closest.captureTimestamp != wbRequest.replayTimestamp // closest.setCaptureTimestamp("20100601000000"); // Resource below has originalUrl="http://test.example.com/", which is // different from // wbRequest.requestUrl above. originalUrl shall be reflected to // resultant redirect URL. @SuppressWarnings("unused") CaptureSearchResults results = setupCaptures( 0, createTestHtmlResource("20100601000000", "hogheogehoge\n".getBytes("UTF-8"))); // handleRequest() // calls handleReplay() // - calls checkInterstitialRedirect() // - calls selfRedirectCanonicalizer.urlStringToKey(requestURL) if // non-null // - calls queryIndex(), which calls // collection.resourceIndex.query(wbRequest) // which in turn returns results above (setup in setupCaptures(...)) // - calls replay.getClosest() // - calls checkAnchorWindow() // - calls getResource(closest, skipFiles), which // - first checks if closest is in skipFiles (and throws // ResourceNotAvailableException if it is), // - then calls collection.resourceStore.retrieveResource(closest), // which returns Resource above. // when closest's timestamp is different from replay requests's // timestamp, it redirects to closest's timestamp. expectRedirect("/web/20100601000000/http://test.example.com/"); EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, replay); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(resourceIndex, resourceStore, replay); assertTrue("handleRequest return value", r); } /** * basic replay test part 2. there's a capture whose capture date matches * the request. * @throws Exception */ public void testHandleRequest_Replay_2() throws Exception { // make sure wbRequesat.requestUrl, replayTimestamp are set up. setReplayRequest("http://test.example.com/", "20100601000000"); // there's capture with timestamp exactly requested for. Resource payloadResource = createTestHtmlResource("20100601000000", "hogheogehoge\n".getBytes("UTF-8")); CaptureSearchResults results = setupCaptures(0, payloadResource); CaptureSearchResult closest = results.getClosest(); expectRendering(closest, payloadResource, payloadResource, results); EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, replay); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(resourceIndex, resourceStore, replay); assertTrue("handleRequest return value", r); } /** * test CaptureSearchResult equality by file and offset. */ public static class CaptureSearchResultMatcher implements IArgumentMatcher { private CaptureSearchResult expected; public CaptureSearchResultMatcher(CaptureSearchResult expected) { this.expected = expected; } @Override public boolean matches(Object actual) { // CaptureSearchResult is compared by file name and offset. this is // how AccessPoint#retrievePayloadForIdenticalContentRevisit(...) // retrieves previous capture. // TODO: this could be defined as CaptureSearchResult#equals(Object). if (!(actual instanceof CaptureSearchResult)) return false; String file = ((CaptureSearchResult)actual).getFile(); long offset = ((CaptureSearchResult)actual).getOffset(); if (expected.getOffset() != offset) return false; return file == null ? expected.getFile() == null : file .equals(expected.getFile()); } @Override public void appendTo(StringBuffer buffer) { buffer.append("eqCaptureSearchResult("); buffer.append(expected.getFile()); buffer.append(","); buffer.append(expected.getOffset()); buffer.append(")"); } } public static CaptureSearchResult eqCaptureSearchResult( CaptureSearchResult expected) { EasyMock.reportMatcher(new CaptureSearchResultMatcher(expected)); return null; } public static class CaptureSearchMatcher implements IArgumentMatcher { private String url; private String replayTimestamp; public CaptureSearchMatcher(String url, String replayTimestamp) { this.url = url; this.replayTimestamp = replayTimestamp; } @Override public boolean matches(Object actual) { if (!(actual instanceof WaybackRequest)) return false; WaybackRequest wbRequest = (WaybackRequest)actual; String replayTimestamp = wbRequest.getReplayTimestamp(); String url = wbRequest.getRequestUrl(); if (url == null || replayTimestamp == null) return false; if (this.url == null || this.replayTimestamp == null) return false; // Only exact match is supported. i.e. http://example.com/ and // http://example.com are different even though they typically // get canonicalized into the same string. // Also not checking if wbRequest is in fact a capture search // request. return this.url.equals(url) && this.replayTimestamp.equals(replayTimestamp); } @Override public void appendTo(StringBuffer buffer) { buffer.append("eqCaptureSearchRequest("); buffer.append(url).append(",").append(replayTimestamp); buffer.append(")"); } } public static WaybackRequest eqCaptureSearchRequest(String url, String replayTimestamp) { EasyMock.reportMatcher(new CaptureSearchMatcher(url, replayTimestamp)); return null; } /** * test of revisit. closest capture is a revisit. * @throws Exception */ public void testHandleRequest_Replay_Revisit() throws Exception { setReplayRequest("http://www.example.com/", "20100601000000"); // closest SearchResult has isDuplicateDigest() == true. byte[] payload = "hogehogehogehoge\n".getBytes("UTF-8"); Resource payloadResource = createTestHtmlResource("20100501000001", payload); Resource headerResource = createTestRevisitResource("20100601000000", payloadResource, true); CaptureSearchResults results = setupCaptures(1, payloadResource, headerResource); CaptureSearchResult previous = results.getResults().get(0); CaptureSearchResult closest = results.getClosest(); assertTrue(closest.isDuplicateDigest()); assertTrue(closest.getDuplicatePayloadFile() != null); assertTrue(closest.getDuplicatePayloadOffset() != null); expectRendering(closest, headerResource, payloadResource, results); EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, replay); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(resourceIndex, resourceStore, replay); assertTrue("handleRequest return value", r); // TODO: failure case: closest.duplicatePayloadFile != null -> // ResourceNotAvailableException // TODO: failure case: self-redirecting -> calls finxNextClosest() and // fails if there's no more closest. // wbRequest.timestampSearchKey == true -> calls queryIndex() once // again. } /** * Test of internal behavior. If loading recording from an archive failed, * AccessPoint shall not attempt to load the same archive again within the * request for performance reasons. * @throws Exception */ public void testHandleReplay_noMultipleErrors() throws Exception { setReplayRequest("http://www.example.com/", "20100601000000"); byte[] payload = "payload".getBytes("UTF-8"); Resource resource0 = createTestHtmlResource("20100428000000", payload); Resource resource1 = createTestHtmlResource("20100501000000", payload); Resource revisit1 = createTestRevisitResource("20100515000000", resource1, true); Resource revisit2 = createTestRevisitResource("20100601000000", resource1, true); CaptureSearchResults results = setupCaptures(3, resource0, resource1, revisit1, revisit2); List<CaptureSearchResult> captures = results.getResults(); // replace ResourceStore mock with a strict one that throws exception // for 20100501000000 capture. collection.setResourceStore(resourceStore = EasyMock .createMock(ResourceStore.class)); CaptureSearchResult capture1 = captures.get(1); // details == filename is a requirement of old code. ResourceNotAvailableException rnae = new ResourceNotAvailableException( "mocked load failure", capture1.getFile()); // point is, retrieveResource() shall not be called while checking // captures 20100515 and 20100501 EasyMock .expect( resourceStore.retrieveResource(eqCaptureSearchResult(capture1))) .andThrow(rnae).once(); EasyMock.expect( resourceStore.retrieveResource(eqCaptureSearchResult(captures .get(0)))).andReturn(resource0); // whether retrieveResource() is called for these revisit captures are // non-essential (they will be, // but it may change) captures.get(2).flagDuplicateDigest(capture1); EasyMock.expect( resourceStore.retrieveResource(eqCaptureSearchResult(captures .get(2)))).andStubReturn(revisit1); captures.get(3).flagDuplicateDigest(capture1); EasyMock.expect( resourceStore.retrieveResource(eqCaptureSearchResult(captures .get(3)))).andStubReturn(revisit2); final String expectedRedirectURI = "/web/20100428000000/http://test.example.com/"; expectRedirect(expectedRedirectURI); EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, replay); // default 0 makes AccessPoint give up on the first // ResourceNotAvailableException on // 20100501000000 capture. here we want it to try 20100428000000 and // succeed. cut.setMaxRedirectAttempts(10); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(resourceIndex, resourceStore, replay); assertTrue("handleRequest return value", r); } public static Resource createSelfRedirectResource(String url, String timestamp) throws IOException { assert !url.endsWith("/"); // typical redirect: http://example.com to http://example.com/ String location = url + "/"; TestWARCRecordInfo recinfo = new TestWARCRecordInfo( TestWARCRecordInfo.buildHttpRedirectResponseBlock( "301 Moved Permanently", location)); recinfo.setUrl(url); recinfo.setCreate14DigitDateFromDT14(timestamp); TestWARCReader ar = new TestWARCReader(recinfo); WARCRecord rec = ar.get(0); WarcResource resource = new WarcResource(rec, ar); resource.parseHeaders(); return resource; } /** * {@code handleReplay()} is supposed to throw * {@code ResourceNotAvailableException} when it cannot find a replay-able * capture for a request. This is a test for one of such "capture not found" * case: revisited record cannot found in the capture search results, and * only other non-revisit resource available is a self-redirect. This is * rather a corner case. handlReplay() shall throw * {@code ResourceNotAvailableException}. * @throws Exception */ public void testHandleRequest_MissingRevisitPayload() throws Exception { setReplayRequest("http://example.com", "20140619004054"); // resource revisited, but missing in capture search result Resource revisited = createSelfRedirectResource("http://example.com", "20140619015411"); CaptureSearchResults results = setupCaptures(0, createSelfRedirectResource("http://example.com", "20140619004054"), createTestRevisitResource("20140619016511", revisited, true)); CaptureSearchResult revisit = results.getResults().get(1); revisit.flagDuplicateDigest(); // revisit, but original is not found. // expectation: // 1. first capture is skipped because it is self-redirect. selects the // second. // 2. second capture is a revisit, calls resourceIndex.query() for the // revisited, but original capture is not found in the result. // 3. ResourceNotAvailableException is thrown // 4. exception captured, skip to the next capture and finds none. // 5. ResourceNotAvailableException is thrown out of handleReplay // 6. ExceptionRenderer.renderException is called (in handleQuery) // XXX setting these up manually feels very fragile - perhaps we need a // test ResourceIndex + ReplayDispatcher. EasyMock.expect( resourceIndex.query(eqCaptureSearchRequest("http://example.com", "20140619015411"))).andReturn(results); EasyMock.expect( replay.getClosest( eqCaptureSearchRequest("http://example.com", "20140619015411"), EasyMock.same(results))).andReturn(results.getResults().get(0)); // for this test, it is easier to test handleReplay, not handleQuery EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, replay); cut.init(); try { cut.handleReplay(wbRequest, httpRequest, httpResponse); fail("handleReplay did not throw ResourceNotAvailableException"); } catch (ResourceNotAvailableException ex) { // expected. } EasyMock.verify(resourceIndex, resourceStore, replay); } /** * old-style WARC revisit (no HTTP status line and header, Content-Length in * WARC header is zero). it shall replay HTTP status line, headers and * content from previous matching capture. * @throws Exception */ public void testHandleRequest_Replay_OldWARCRevisit() throws Exception { // TODO - it'd be better to define an interface in Resource class so // that AccessPoint needs not to have separate execution path for this. // that's easier to test. } /** * old-style ARC revisit (no mimetype, filename is '-') * @throws Exception */ public void testHandleRequest_Replay_OldARCRevisit() throws Exception { // ditto - see TODO comment above. } public static final Resource createTest502Resource() throws IOException { byte[] failPayload = "failed\n".getBytes("UTF-8"); byte[] content = TestWARCRecordInfo.buildHttpResponseBlock( "502 Bad Gateway", "text/plain", failPayload); TestWARCRecordInfo recinfo = new TestWARCRecordInfo(content); TestWARCReader ar = new TestWARCReader(recinfo); WARCRecord rec = ar.get(0); WarcResource resource = new WarcResource(rec, ar); resource.parseHeaders(); return resource; } /** * if closest is not HTTP-success AND replaying embedded context (CSS, * JavaScript, images, etc.), use next closest with successful response, or * for lower priority, a redirect, instead. unless such capture is of the * same timestamp as the replay request, redirect to the capture found. * @throws Exception */ public void testHandleRequest_Replay_Embedded() throws Exception { // request timestamp is different from 'previous' below. it makes // handleRequest return redirect. in this case, Resource for 'previous' // will not be retrieved. setReplayRequest("http://test.example.com/style.css", "20100601000000"); // if closest is not HTTP-success, // to have isAnyEmbeddedContext() return true - any of cSSContext, // iMGContext, jSContext // frameWrapperContext, iFrameWrapperContext, objectEmbedContext has the // same effect. wbRequest.setCSSContext(true); assertTrue(wbRequest.isAnyEmbeddedContext()); CaptureSearchResults results = setupCaptures( 1, createTestHtmlResource("http://test.example.com/style.css", "20100501000000", "hogheogehoge\n".getBytes("UTF-8")), createTest502Resource()); CaptureSearchResult closest = results.getClosest(); assertTrue(closest.isHttpError()); // or wbRequest.setBestLatestReplayRequest(); final String expectedRedirectURI = "/web/20100501000000cs_/http://test.example.com/style.css"; expectRedirect(expectedRedirectURI); // TODO: extraHeaders expectations? EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, query, replay); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); // handleReplay throws BetterRequestException and handled inside // handleRequest(). exception will not be thrown out of handleRequest(). EasyMock.verify(httpResponse, resourceIndex, resourceStore, query, replay); assertTrue("handleRequest return value", r); } // REFACTORING THOUGHTS: WaybackRequet.setUrlCaptureQueryRequest() could // take requestUrl and replayTimestamp. /** * create new WaybackRequest set up as capture query request for URL * {@code requestUrl}, at time {@ode replayTimestamp}. * @param requestUrl * @param replayTimestamp */ public final void setCaptureQueryRequest(String requestUrl, String replayTimestamp) { wbRequest = new WaybackRequest(); wbRequest.setCaptureQueryRequest(); wbRequest.setRequestUrl(requestUrl); wbRequest.setReplayTimestamp(replayTimestamp); } // REFACTORING THOUGHTS: query rendering could be done in the same mechanism // as replay rendering. // there's no particular reason CaptureSearchResults rendering and // UrlSearchResults rendering must be implemented in the same class. // they share nothing. // ReplayRenderer and QueryRenderer may be unified by passing UIResults // instead of (CaptureSearchResult, Resource, CaptureSearchResults) for // ReplayRenderer, and (CaptureSearchResults / UrlSearchResults) for // QueryRenderer. // this way, query.Renderer could be replaced by generic "variant dispatcher" // class that dispatches rendering to different JSPs depending on the type of // output (HTML or XML). public void testHandleRequest_CaptureSearchResults() throws Exception { setCaptureQueryRequest("http://www.example.com/", "20100601123456"); // handleRequest() // redirect to queryPrefix + translateRequestPathQuery(httpRequest) // if bounceToQueryPrefix is true (not tested here) // copies exactHostMatch to wbRequest.exactHost (TODO: should be done by // parser?) // calls handleQuery() // - calls queryIndex(), which calls collection.resourceIndex.query(), // which returns CaptureSearchResults // (unexpected object from queryIndex() results in // WaybackException("Unknown index format"). // this is considered to be a programming/configuration error. not // tested.) CaptureSearchResults results = new CaptureSearchResults(); CaptureSearchResult result = new CaptureSearchResult(); results.setClosest(result); EasyMock.expect(resourceIndex.query(wbRequest)).andReturn(results); // - calls MementoUtils.printTimemapResponse(results, wbRequest, // httpResponse) instead // if wbRequst.isMementoTimemapRequest() (N/A here) (TODO: can we move // this to QueryRenderer implementation?) // - calls query.renderCaptureResults(...) query.renderCaptureResults(httpRequest, httpResponse, wbRequest, results, cut.getUriConverter()); EasyMock.replay(httpRequest, httpResponse, resourceIndex, query); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(query); // result shall have closest flag set (FIrefox proxy plugin expects // this) assertTrue("closest flag", result.isClosest()); assertTrue("handleRequest return value", r); } // REFACTORING THOUGHTS: WaybackRequet.setUrlQueryRequest() could take // requestUrl and replayTimestamp. /** * create new WaybackRequest set up as URL query request for URL * {@code requestUrl} at time {@code replayTimestamp}. created object is set * to {@link #wbRequest}. * @param requestUrl * @param replayTimestamp */ public void setUrlQueryRequest(String requestUrl, String replayTimestamp) { wbRequest = new WaybackRequest(); wbRequest.setUrlQueryRequest(); wbRequest.setRequestUrl(requestUrl); wbRequest.setReplayTimestamp(replayTimestamp); } public void testHandleRequest_UrlSearchResults() throws Exception { setUrlQueryRequest("http://www.example.com/", "20100601123456"); // AccessPoint is not concerned of the details of UrlSearchResults. it // just forwards the request to QueryRenderer. so we leave it uninitialized // here. UrlSearchResults results = new UrlSearchResults(); EasyMock.expect(resourceIndex.query(wbRequest)).andReturn(results); // EXPECTATION: AccessPoint.handleQuery() calls // query.renderUrlResults(). query.renderUrlResults(httpRequest, httpResponse, wbRequest, results, cut.getUriConverter()); EasyMock.replay(httpRequest, httpResponse, query, resourceIndex); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(query, resourceIndex); assertTrue("handleRequest return value", r); } // tests for collapseTime parameter. for query requests (both capture and // URL), // AccessPoint passes its collapseTime parameter to ResourceIndex#query via // WaybackRequest.collapseTime. for replay request, it doesn't. public void testHandleRequest_queryCollapseTimeUnspecified() throws Exception { cut.setQueryCollapseTime(-1); setUrlQueryRequest("http://www.example.com/", "20100601123456"); expectUrlIndexQuery(); EasyMock.replay(httpRequest, httpResponse, query, resourceIndex); cut.init(); boolean handled = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(query, resourceIndex); assertTrue("handleRequest return value", handled); assertEquals(-1, wbRequest.getCollapseTime()); } public void testHandleRequest_queryCollapseTimeSpecified() throws Exception { cut.setQueryCollapseTime(10); setUrlQueryRequest("http://www.example.com/", "20100601123456"); expectUrlIndexQuery(); EasyMock.replay(httpRequest, httpResponse, query, resourceIndex); cut.init(); boolean handled = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(query, resourceIndex); assertTrue("handleRequest return value", handled); assertEquals(10, wbRequest.getCollapseTime()); } public void testHandlerRequest_queryCollapseTimeForReplayQuery() throws Exception { cut.setQueryCollapseTime(10); // query parameters and CaptureSeachResults details are irrelevant to // this test. setReplayRequest("http://test.example.com/", "20100601123456"); setupCaptures( 0, createTestHtmlResource("20100601000000", "hogheogehoge\n".getBytes("UTF-8"))); expectRedirect("/web/20100601000000/http://test.example.com/"); EasyMock.replay(httpRequest, httpResponse, query, resourceIndex, resourceStore, replay); cut.init(); boolean handled = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(query, resourceIndex); assertTrue("handleRequest return value", handled); assertEquals(-1, wbRequest.getCollapseTime()); } /** * if bounceToReplayPrefix is true, replay request is redirected to other * access point. * @throws Exception */ public void testBounceToReplayPrefix() throws Exception { final String URL = "http://www.example.com/"; final String TIMESTAMP = "20100601123456"; setReplayRequest(URL, TIMESTAMP); EasyMock.reset(httpRequest); EasyMock.expect(httpRequest.getRequestURI()).andStubReturn( "/" + TIMESTAMP + "/" + URL); EasyMock.expect(httpRequest.getLocalName()).andStubReturn("localhost"); EasyMock.expect(httpRequest.getLocalPort()).andStubReturn(8080); EasyMock.expect(httpRequest.getContextPath()).andStubReturn("/"); EasyMock.expect(httpRequest.getLocale()).andStubReturn( Locale.CANADA_FRENCH); final String replayPrefix = "http://test.archive.org/"; cut.setBounceToReplayPrefix(true); cut.setReplayPrefix(replayPrefix); final String suffix = "/" + TIMESTAMP + "/" + URL; httpResponse.sendRedirect(replayPrefix + suffix); EasyMock.replay(httpRequest, httpResponse); cut.handleRequest(httpRequest, httpResponse); } // TODO: the way AccessPoint is reused for rendering static resource looks // inefficient. // bounceToReplayPrefix and bounceToQueryPrefix are always configured in // pair, and they are set to true only for static resource AccessPoint. /** * static AccessPoint - configured with * <ul> * <li>accessPointPath=<code>${wayback.staticPrefix}</code></li> * <li>serveStatic=true</li> * <li>bounceToReplayPrefix=true</li> * <li>bounceToQueryPrefix=true</li> * </ul> * when {@link RequestParser#parse(HttpServletRequest, AccessPoint)} returns * null, request is forwarded to dispatchLocal() for rendering static * resources. * @throws Exception */ public void testDispatchLocal() throws Exception { // first reset the mock for overriding getAttribute(), getRequestURI(), // and getRequestURL() EasyMock.reset(httpRequest); EasyMock.expect(httpRequest.getLocalName()).andStubReturn("localhost"); EasyMock.expect(httpRequest.getLocalPort()).andStubReturn(8080); EasyMock.expect(httpRequest.getContextPath()).andStubReturn("/static"); EasyMock.expect(httpRequest.getLocale()).andStubReturn( Locale.CANADA_FRENCH); EasyMock.expect( httpRequest.getRequestDispatcher(EasyMock.<String>notNull())) .andStubReturn(requestDispatcher); // used by RequestMapper#getRequestPathPrefix(HttpServletRequest) // typical value found in // ia-wayback-projects/projects/global-wayback/configs/local/wayback.properties // TODO: RequestMapper#getRequestContextPath(HttpServletRequest) assumes // value of this // attribute ends with "/". RequestMapper has constant declaration for // "webapp-request-context-path-prefix", but it's private. EasyMock.expect( httpRequest.getAttribute("webapp-request-context-path-prefix")) .andStubReturn("/static/"); // override getRequestURI() behavior EasyMock.expect(httpRequest.getRequestURI()).andStubReturn( "/static/aaa.css"); EasyMock.expect(httpRequest.getRequestURL()).andStubReturn( new StringBuffer("/static/aaa.css")); // reconfigure RequestParser to return null, which signifies that // there's no dynamic handler and the request shall be mapped to local // static resource. (AccessPoint#dispatchLocal(HttpServletRequest)) EasyMock.reset(parser); EasyMock.expect(parser.parse(httpRequest, cut)).andReturn(null); // AccessPoint#dispatchLocal() checks existence of the file if // ServletContext#getRealPath() // returns non-null value for translated request path. have it skip the // test by returning null. otherwise dispatchLocal() will fail. ServletContext servletContext = EasyMock .createMock(ServletContext.class); EasyMock.expect(servletContext.getRealPath(EasyMock.<String>notNull())) .andStubReturn(null); cut.setServletContext(servletContext); // Expectation: AccessPoint#dispatchLocal() eventually calls // RequestDispatcher#forward(...) requestDispatcher.forward(httpRequest, httpResponse); EasyMock.replay(httpRequest, parser, servletContext, requestDispatcher); assertEquals("aaa.css", RequestMapper.getRequestContextPath(httpRequest)); // AccessPoint#dispatchLocal() returns immediately if serveStatis is // false. cut.setServeStatic(true); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(parser, requestDispatcher); assertTrue("handleRequest return value", r); } // *** Memento Tests *** // REFACTORING THOUGHTS: Memento annotations (adding response headers) could // be implemented as decorator of replay renderer. because of this possibility, // I separated out tests for memento headers here. /** * test of Memento-furnished response to URL-M (Memento). Memento * Specification states that URL-M response * <ul> * <li>MUST NOT have "Vary: accept-datetime"</li> * <li>MUST have "Memento-Datetime"</li> * <li>MUST have "Link" header with at least a URI-R as "original" relation. * </li> * </ul> * @throws Exception */ public void testMemento_replay_exactCapture() throws Exception { final String AGGREGATION_PREFIX = "http://web.archive.org"; cut.setEnableMemento(true); cut.setConfigs(new Properties()); cut.getConfigs().setProperty(MementoUtils.AGGREGATION_PREFIX_CONFIG, AGGREGATION_PREFIX); // make sure wbRequesat.requestUrl, replayTimestamp are set up. setReplayRequest("http://www.example.com/", "20100601000000"); assertFalse(wbRequest.isMementoTimegate()); Resource payloadResource = createTestHtmlResource("20100601000000", "hogehogehogehoge\n".getBytes("UTF-8")); CaptureSearchResults results = setupCaptures(0, payloadResource); CaptureSearchResult closest = results.getClosest(); // when closest's timestamp == request's timestamp, // it gets ReplayRenderer with replay.getRenderer(wbRequest, closest, // httpHeaderResource, payloadResource), // and calls renderResource() on it. expectRendering(closest, payloadResource, payloadResource, results); // key expectations of this test // called through MementoUtils.addMementoHeaders(...) // NO Vary: accept-datetime header. final String expectedMementoDateTime = "Tue, 01 Jun 2010 00:00:00 GMT"; httpResponse.setHeader(MementoUtils.MEMENTO_DATETIME, expectedMementoDateTime); // Wayback include timemap, timegate, first and last memento links in // addition to mandatory "original" link. // TODO: actually it is acceptable to have various rels in different // order. It'd take custom argument matcher. final String expectedMementoLink = String .format( "<%1$s>; rel=\"original\", " + "<%2$s%3$stimemap/link/%1$s>; rel=\"timemap\"; type=\"application/link-format\", " + "<%2$s%3$s%1$s>; rel=\"timegate\", " + "<%2$s%3$s%4$s/%1$s>; rel=\"first last memento\"; datetime=\"%5$s\"", "http://www.example.com/", AGGREGATION_PREFIX, WEB_PREFIX, "20100601000000", expectedMementoDateTime); httpResponse.setHeader(MementoUtils.LINK, expectedMementoLink); EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, replay); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(resourceIndex, resourceStore, replay); assertTrue("handleRequest return value", r); } /** * Test of Memento-furnished response to replay request for non-archived * timestamp. This is not strictly a URL-M (as far as I understand). Wayback * returns <i>intermediate resource</i> to URI-M. Memento Specification * states the response * <ul> * <li>MUST NOT have "Vary: accept-datetime" header (this is the key * difference from redirect from URI-G; see below)</li> * <li>MUST NOT have "Memento-Datetime" header</li> * <li>MUST have "Link" header, which MUST have at least "original" relation * link. "timegate", "timemap" and "memento" relation type links MAY be * provided.</li> * </ul> * @throws Exception */ public void testMemento_replay_nearbyCapture() throws Exception { cut.setEnableMemento(true); // make sure wbRequesat.requestUrl, replayTimestamp are set up. // As this is a URI-M, not URI-G, mementoTimegate flag must be false. setReplayRequest("http://test.example.com/", "20100601123456"); assertFalse(wbRequest.isMementoTimegate()); Resource payloadResource = createTestHtmlResource("20100601000000", "hogehogehogehoge\n".getBytes("UTF-8")); @SuppressWarnings("unused") CaptureSearchResults results = setupCaptures(0, payloadResource); // handleRequest() // calls handleReplay() // - calls checkInterstitialRedirect() // - calls selfRedirectCanonicalizer.urlStringToKey(requestURL) if // non-null // - calls queryIndex(), which calls collection.resourceIndex.query(wbRequest) // redirects to URL for closest capture. expectRedirect("/web/20100601000000/http://test.example.com/"); // also has a Link header with just "original" relation. httpResponse .setHeader("Link", String.format("<%s>; rel=\"original\"", "http://test.example.com/")); EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, replay); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(httpResponse, resourceIndex, resourceStore, replay); assertTrue("handleRequest return value", r); } /** * Test of Memento-furnished response to Timegate (URI-G). Memento * Specification states the response * <ul> * <li>MUST have "Vary: accept-datetime" header</li> * <li>MUST NOT have "Memento-Datetime" header</li> * <li>MUST have "Link" header, which MUST have at least "original" relation * link. "timegate", "timemap" and "memento" relation type links MAY be * provided.</li> * </ul> * @throws Exception */ public void testMementoTimegate() throws Exception { final String AGGREGATION_PREFIX = "http://web.archive.org"; cut.setEnableMemento(true); cut.setConfigs(new Properties()); cut.getConfigs().setProperty(MementoUtils.AGGREGATION_PREFIX_CONFIG, AGGREGATION_PREFIX); // Wayback Timegate is mapped to date-less replay URL (/web/<URI-R>) // with Accept-Datetime header, but it is irrelevant here (it's a // RequestParser matter.) What's relevant here is that WaybackRequest // is a replay request with mementoTimegate property set to true. setReplayRequest("http://test.example.com/", "20100601123456"); wbRequest.setMementoTimegate(); Resource payloadResource = createTestHtmlResource("20100601000000", "hogehogehogehoge\n".getBytes("UTF-8")); @SuppressWarnings("unused") CaptureSearchResults results = setupCaptures(0, payloadResource); final String expectedMementoDateTime = "Tue, 01 Jun 2010 00:00:00 GMT"; // redirects to URL for closest capture. expectRedirect("/web/20100601000000/http://test.example.com/"); // MUST have "Vary: accept-datetime" header httpResponse.setHeader("Vary", "accept-datetime"); // also has Link header with mandatory "original" link, optional // "timemap", and "first/last memento" (combined here) links. It // can also include "prev/next memento" links, which is not // applicable here. final String expectedMementoLink = String .format( "<%1$s>; rel=\"original\", " + "<%2$s%3$stimemap/link/%1$s>; rel=\"timemap\"; type=\"application/link-format\", " + "<%2$s%3$s%4$s/%1$s>; rel=\"first last memento\"; datetime=\"%5$s\"", "http://test.example.com/", AGGREGATION_PREFIX, WEB_PREFIX, "20100601000000", expectedMementoDateTime); httpResponse.setHeader(MementoUtils.LINK, expectedMementoLink); EasyMock.replay(httpRequest, httpResponse, resourceIndex, resourceStore, replay); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(httpResponse, resourceIndex, resourceStore, replay); assertTrue("handleRequest return value", r); } /** * test of Memento Timemap request. * <p> * Actual rendering is done by a separate bean implementing * {@link MementoHandler}. So what we test here is that AccessPoint is * correctly routing Timemap request to MementoHandler. */ public void testMementoTimemap() throws Exception { cut.setEnableMemento(true); setCaptureQueryRequest("http://www.example.com/", "20100601000000"); // this make WaybackRequest.isMementoTimemapRequest() return true, // which shall direct AccessPoint to call MementHandler.renderMementoTimemap(). wbRequest.setMementoTimemapFormat("link"); MementoHandler mementoHandler = EasyMock .createMock(MementoHandler.class); EasyMock.expect( mementoHandler.renderMementoTimemap(wbRequest, httpRequest, httpResponse)).andReturn(true); cut.setMementoHandler(mementoHandler); EasyMock.replay(httpRequest, httpResponse, mementoHandler); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(mementoHandler); assertTrue("handleRequest return value", r); } /** * test for local static resource when enableMemento=true. expectations: * <ul> * <li>do-not-negotiate Link header is set. * </ul> * </ul> * @throws Exception */ public void testMemento_dispatchLocal() throws Exception { cut.setEnableMemento(true); cut.setServeStatic(true); // first reset the mock for overriding getAttribute(), getRequestURI(), // and getRequestURL() EasyMock.reset(httpRequest); setupRequestStub("/static", "/static/aaa.css", "/static/"); EasyMock.expect( httpRequest.getAttribute("webapp-request-context-path-prefix")) .andStubReturn("/static/"); // reconfigure RequestParser to return null, which signifies that // there's no dynamic handler and the request shall be mapped to local // static resource. (AccessPoint#dispatchLocal(HttpServletRequest)) EasyMock.reset(parser); EasyMock.expect(parser.parse(httpRequest, cut)).andReturn(null); // AccessPoint#dispatchLocal() checks existence of the file if // ServletContext#getRealPath() returns non-null value for translated // request path. have it skip the test by returning // null. otherwise dispatchLocal() will fail. ServletContext servletContext = EasyMock .createMock(ServletContext.class); EasyMock.expect(servletContext.getRealPath(EasyMock.<String>notNull())) .andStubReturn(null); cut.setServletContext(servletContext); // Expectation: AccessPoint#dispatchLocal() eventually calls // RequestDispatcher#forward(...) requestDispatcher.forward(httpRequest, httpResponse); // key expectation in this test httpResponse.setHeader(MementoUtils.LINK, "<http://mementoweb.org/terms/donotnegotiate>; rel=\"type\""); EasyMock.replay(httpRequest, parser, servletContext, requestDispatcher); cut.init(); boolean r = cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(parser, requestDispatcher); assertTrue("handleRequest return value", r); } /** * {@code AccessPoint.authentication} is typically configured with {@link AccessControlSettingOperation}, * which conditionally installs exclusion filter by calling {@code WaybackRequest.exclusionFilter} directly. * {@link AccessPoint#createExclusionFilter(WaybackRequest)} shall not overwrite it. * * <p>Issue: <a href="https://github.com/iipc/openwayback/issues/259">#259</a></p> * * @throws Exception */ public void testExclusionThroughAuthentication() throws Exception { ExclusionFilterFactory accessControlFactory = EasyMock.createMock(ExclusionFilterFactory.class); ExclusionFilter accessControlFilter = new ExclusionFilter() { @Override public int filterObject(CaptureSearchResult o) { return FILTER_INCLUDE; } }; EasyMock.expect(accessControlFactory.get()).andReturn(accessControlFilter).once(); AccessControlSettingOperation acso = new AccessControlSettingOperation(); acso.setFactory(accessControlFactory); acso.setOperator(new TrueBooleanOperator<WaybackRequest>()); cut.setAuthentication(acso); // it is not essential to setup URL search query. It's just requires least setup. // for this test. setUrlQueryRequest("http://example.com/", "20100601123456"); expectUrlIndexQuery(); EasyMock.replay(accessControlFactory, httpRequest, httpResponse, resourceIndex, query); boolean r = cut.handleRequest(httpRequest, httpResponse); ExclusionFilter filter = wbRequest.getExclusionFilter(); if (filter != accessControlFilter) { fail("Expected " + accessControlFilter + ", but got " + filter); } } // TODO: tests of live-web redirector }