/** * */ package org.archive.wayback.resourceindex.cdxserver; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import javax.servlet.http.Cookie; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import junit.framework.TestCase; import org.archive.cdxserver.CDXQuery; import org.archive.cdxserver.CDXServer; import org.archive.cdxserver.auth.AuthToken; import org.archive.cdxserver.auth.PrivTokenAuthChecker; import org.archive.cdxserver.writer.CDXWriter; import org.archive.cdxserver.writer.HttpCDXWriter; import org.archive.format.cdx.CDXFieldConstants; import org.archive.format.cdx.CDXLine; import org.archive.format.cdx.FieldSplitFormat; import org.archive.format.gzip.zipnum.ZipNumCluster; import org.archive.format.gzip.zipnum.ZipNumParams; import org.archive.util.iterator.CloseableIterator; import org.archive.wayback.accesscontrol.robotstxt.redis.RedisRobotExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.RobotAccessControlException; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.WrappedCloseableIterator; import org.archive.wayback.util.url.KeyMakerUrlCanonicalizer; import org.archive.wayback.webapp.PerfStats; import org.easymock.EasyMock; import org.easymock.IAnswer; /** * Test {@link EmbeddedCDXServerIndex}. * @author Kenji Nagahashi * */ public class EmbeddedCDXServerIndexTest extends TestCase { /** * fixture CDXServer (unnecessary if CDServer was an interface). * <p> * Note: {@code testHandleRequest} and {@code testRenderMementoTimemap} uses * {@link CDXServer#getCdx(HttpServletRequest, HttpServletResponse, CDXQuery)}, * which eventually calls {@link #getCdx(CDXQuery, AuthToken, CDXWriter)} here. * </p> */ public static class TestCDXServer extends CDXServer { public List<Object[]> capturedArgs = new ArrayList<Object[]>(); public CDXLine[] cdxLines; @Override public void getCdx(CDXQuery query, AuthToken authToken, CDXWriter responseWriter) throws IOException { capturedArgs.add(new Object[] { query, authToken, responseWriter }); responseWriter.begin(); for (CDXLine cdxLine : cdxLines) { responseWriter.writeLine(cdxLine); } responseWriter.end(); } public void clearCapturedArgs() { capturedArgs.clear(); } } EmbeddedCDXServerIndex cut; TestCDXServer testCDXServer; /* (non-Javadoc) * @see junit.framework.TestCase#setUp() */ protected void setUp() throws Exception { cut = new EmbeddedCDXServerIndex(); cut.setCanonicalizer(new KeyMakerUrlCanonicalizer()); cut.setCdxServer(testCDXServer = new TestCDXServer()); Logger.getLogger(PerfStats.class.getName()).setLevel(Level.WARNING); } /** * Set CDX lines TestCDXServer stub returns. * Lines are parsed with {@link CDXFieldConstants#CDX_ALL_NAMES}. * Note {@link EmbeddedCDXServerIndex#query} will throw {@link ResourceNotInArchiveException} * if {@code lines} is empty. * @param lines text CDX lines */ protected void setCdxLines(String... lines) { // urlkey, timestamp, original, mimetype, statuscode, digest, redirect, // robotflags, length, offset, filename. final FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES; testCDXServer.cdxLines = new CDXLine[lines.length]; int i = 0; for (String line : lines) { testCDXServer.cdxLines[i++] = new CDXLine(line, fmt); } } // === sample cdx lines === final String CDXLINE1 = "com,example)/ 20101124000000 http://example.com/ text/html 200" + " ABCDEFGHIJKLMNOPQRSTUVWXYZ012345 - - 2000 0 /a/a.warc.gz"; // for testing ignore-robots final String CDXLINE2 = "com,norobots)/ 20101124000000 http://example.com/ text/html 200" + " ABCDEFGHIJKLMNOPQRSTUVWXYZ012345 - - 2000 0 /a/a.warc.gz"; /** * capture search. basic options. * @throws Exception */ public void testQuery() throws Exception { WaybackRequest wbr = new WaybackRequest(); wbr.setRequestUrl("http://example.com/"); wbr.setCaptureQueryRequest(); // urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags, // length, offset, filename. FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES; testCDXServer.cdxLines = new CDXLine[] { new CDXLine(CDXLINE1, fmt) }; SearchResults sr = cut.query(wbr); assertEquals(1, sr.getReturnedCount()); assertEquals(1, testCDXServer.capturedArgs.size()); Object[] args = testCDXServer.capturedArgs.get(0); CDXQuery query = (CDXQuery)args[0]; String[] filter = query.getFilter(); assertEquals(1, filter.length); assertEquals("!statuscode:(500|502|504)", filter[0]); AuthToken authToken = (AuthToken)args[1]; assertFalse(authToken.isIgnoreRobots()); } /** * {@link EmbeddedCDXServerIndex} resolves revisits for replay requests. * (This is actually a test of {@link CDXToCaptureSearchResultsWriter}.) * @throws Exception */ public void testRevisitResolution() throws Exception { WaybackRequest wbr = WaybackRequest.createReplayRequest( "http://example.com/", "20101125000000", null, null); setCdxLines( "com,example)/ 20101124000000 http://example.com/ text/html 200" + " XXXX - - 2000 0 /a/a.warc.gz", "com,example)/ 20101125000000 http://example.com/ warc/revisit 200" + " XXXX - - 2000 0 /a/b.warc.gz", "com,example)/ 20101126000000 http://example.com/ text/html 200" + " XXXX - - 2000 0 /a/c.warc.gz" ); SearchResults sr = cut.query(wbr); assertEquals(3, sr.getReturnedCount()); CaptureSearchResults results = (CaptureSearchResults)sr; List<CaptureSearchResult> list = results.getResults(); CaptureSearchResult capture2 = list.get(1); assertEquals("20101125000000", capture2.getCaptureTimestamp()); assertEquals("20101124000000", capture2.getDuplicateDigestStoredTimestamp()); assertEquals("/a/a.warc.gz", capture2.getDuplicatePayloadFile()); assertEquals(0, (long)capture2.getDuplicatePayloadOffset()); assertEquals(2000, capture2.getDuplicatePayloadCompressedLength()); assertSame(list.get(0), capture2.getDuplicatePayload()); } /** * {@link CDXToCaptureSearchResultsWriter} resolves revisits for replay requests * (reverse order input mode) (Test of {@link CDXToCaptureSearchResultsWriter}.) * <p>Since there's no way to put {@code CDXToCaptureSearchResultsWriter}'s in reverse * mode, this test calls {@code CDXToCaptureSearchResultWriter} directly.</p> * <p>In other words, its reverse mode is never used in practice.</p> * @throws Exception */ public void testRevisitResolutionReverse() throws Exception { WaybackRequest wbr = WaybackRequest.createReplayRequest( "http://example.com/", "20101125000000", null, null); final String[] CDXLINES = { "com,example)/ 20101124000000 http://example.com/ text/html 200" + " XXXX - - 2000 0 /a/a.warc.gz", "com,example)/ 20101125000000 http://example.com/ warc/revisit 200" + " XXXX - - 2000 0 /a/b.warc.gz", "com,example)/ 20101126000000 http://example.com/ text/html 200" + " XXXX - - 2000 0 /a/c.warc.gz" }; CDXQuery query = new CDXQuery(wbr.getRequestUrl()); query.setSort(CDXQuery.SortType.reverse); assertTrue(query.isReverse()); CDXToCaptureSearchResultsWriter cdxw = new CDXToCaptureSearchResultsWriter(query, true, false, null); final FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES; cdxw.begin(); // feed in reverse order for (int i = CDXLINES.length; i > 0; i--) { CDXLine line = new CDXLine(CDXLINES[i - 1], fmt); cdxw.trackLine(line); cdxw.writeLine(line); } cdxw.end(); CaptureSearchResults results = cdxw.getSearchResults(); assertEquals(3, results.getReturnedCount()); List<CaptureSearchResult> list = results.getResults(); CaptureSearchResult capture1 = list.get(0); // CDXToCaptureSearchResultWriter returns CaptureSearchResult's in chronological // order (oldest to newer), even when query.isReverse() == true. assertEquals("20101124000000", capture1.getCaptureTimestamp()); CaptureSearchResult capture2 = list.get(1); assertEquals("20101125000000", capture2.getCaptureTimestamp()); assertEquals("20101124000000", capture2.getDuplicateDigestStoredTimestamp()); assertEquals("/a/a.warc.gz", capture2.getDuplicatePayloadFile()); assertEquals(0, (long)capture2.getDuplicatePayloadOffset()); assertEquals(2000, capture2.getDuplicatePayloadCompressedLength()); assertSame(capture1, capture2.getDuplicatePayload()); } /** * Test of soft-block feature (regular replay). * capture with "X" in {@code robotflags} field does not make its way * into {@code CaptureSearchResults}, but still available as payload * capture for revisits. * @throws Exception */ public void testSoftBlock() throws Exception { WaybackRequest wbr = WaybackRequest.createReplayRequest( "http://example.com/", "20101125000000", null, null); setCdxLines( "com,example)/ 20101124000000 http://example.com/ text/html 200" + " XXXX - X 2000 0 /a/a.warc.gz", "com,example)/ 20101125000000 http://example.com/ warc/revisit 200" + " XXXX - - 2000 0 /a/b.warc.gz", "com,example)/ 20101126000000 http://example.com/ text/html 200" + " XXXX - - 2000 0 /a/c.warc.gz" ); CaptureSearchResults results = (CaptureSearchResults)cut.query(wbr); assertEquals(2, results.getReturnedCount()); // first line is excluded List<CaptureSearchResult> list = results.getResults(); assertEquals(2, list.size()); CaptureSearchResult capture1 = list.get(0); assertEquals("20101125000000", capture1.getCaptureTimestamp()); CaptureSearchResult capture2 = list.get(1); assertEquals("20101126000000", capture2.getCaptureTimestamp()); // but revisit is resolved to the first line. assertEquals("20101124000000", capture1.getDuplicateDigestStoredTimestamp()); assertEquals("/a/a.warc.gz", capture1.getDuplicatePayloadFile()); assertEquals(0, (long)capture1.getDuplicatePayloadOffset()); assertEquals(2000, capture1.getDuplicatePayloadCompressedLength()); // payload capture is available via duplicatePayload CaptureSearchResult captureX = capture1.getDuplicatePayload(); assertNotNull(captureX); assertEquals("20101124000000", captureX.getCaptureTimestamp()); // test if capture1 pretends to be an ordinary capture. // we want to hide the fact that it's content is coming from // blocked capture (this is actually a test of CaptereSearchResult.) assertFalse(capture1.isDuplicateDigest()); } /** * Supplementary test for soft-block feature. * Modification to {@code robotflags} made by {@code exclusionFilter} must be * properly recognized. As baseline {@code EmbeddedCDXServerIndex} does not have * setting up {@code exclusionFilter}, this test deals with * {@link CDXToCaptureSearchResultsWriter} directly. * @throws Exception */ public void testSoftBlock_fieldModificationRecognized() throws Exception { WaybackRequest wbr = WaybackRequest.createReplayRequest( "http://example.com/", "20101125000000", null, null); final String[] CDXLINES = { // note this line has no "X" in robotflags field (compare with test above) "com,example)/ 20101124000000 http://example.com/ text/html 200" + " XXXX - - 2000 0 /a/a.warc.gz", "com,example)/ 20101125000000 http://example.com/ warc/revisit 200" + " XXXX - - 2000 0 /a/b.warc.gz", "com,example)/ 20101126000000 http://example.com/ text/html 200" + " XXXX - - 2000 0 /a/c.warc.gz" }; CDXQuery query = new CDXQuery(wbr.getRequestUrl()); ExclusionFilter exclusionFilter = new ExclusionFilter() { @Override public int filterObject(CaptureSearchResult o) { if (o.getCaptureTimestamp().startsWith("20101124")) { o.setRobotFlag(CaptureSearchResult.CAPTURE_ROBOT_BLOCKED); } return FILTER_INCLUDE; } }; CDXToCaptureSearchResultsWriter cdxw = new CDXToCaptureSearchResultsWriter(query, true, false, null); cdxw.setExclusionFilter(exclusionFilter); final FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES; cdxw.begin(); for (String l : CDXLINES) { CDXLine line = new CDXLine(l, fmt); cdxw.trackLine(line); cdxw.writeLine(line); } cdxw.end(); CaptureSearchResults results = cdxw.getSearchResults(); // first capture will be removed from the result. assertEquals(2, results.getReturnedCount()); List<CaptureSearchResult> list = results.getResults(); CaptureSearchResult capture1 = list.get(0); assertEquals("20101125000000", capture1.getCaptureTimestamp()); CaptureSearchResult captureX = capture1.getDuplicatePayload(); assertNotNull(captureX); assertEquals("20101124000000", captureX.getCaptureTimestamp()); // modification to robotflags field made by ExclusionFilter must be reflected // in capture1. assertEquals("X", captureX.getRobotFlags()); } /** * Test of soft-block feature (URL-agnostic revisit payload lookup). * In revisit payload lookup mode, capture with "X" is returned. * @throws Exception */ public void testSoftBlock_revisitPayloadLookup() throws Exception { WaybackRequest wbr = WaybackRequest.createReplayRequest( "http://example.com/", "20101124000000", null, null); wbr.put(EmbeddedCDXServerIndex.REQUEST_REVISIT_LOOKUP, "true"); setCdxLines( "com,example)/ 20101124000000 http://example.com/ text/html 200" + " XXXX - X 2000 0 /a/a.warc.gz", "com,example)/ 20101125000000 http://example.com/ warc/revisit 200" + " XXXX - - 2000 0 /a/b.warc.gz", "com,example)/ 20101126000000 http://example.com/ text/html 200" + " XXXX - - 2000 0 /a/c.warc.gz" ); CaptureSearchResults results = (CaptureSearchResults)cut.query(wbr); CaptureSearchResult capture1 = results.getResults().get(0); assertEquals("20101124000000", capture1.getCaptureTimestamp()); assertSame(capture1, results.getClosest()); } /** * quick test of {@link EmbeddedCDXServerIndex#buildStatusFilter(String)} */ public void testBuildStatusFilter() { final String[][] CASES = new String[][] { { "!500", "!statuscode:500" }, { "! 400|500|502 ", "!statuscode:400|500|502" }, { "[23]..", "statuscode:[23].." }, { "! ", "" }, { "", "" }, { null, "" } }; for (String[] c : CASES) { assertEquals(c[1], EmbeddedCDXServerIndex.buildStatusFilter(c[0])); } } /** * test of {@link EmbeddedCDXServerIndex#setBaseStatusRegexp(String)} * @throws Exception */ public void testQueryWithCustomStatusFilter() throws Exception { WaybackRequest wbr = new WaybackRequest(); wbr.setRequestUrl("http://example.com/"); wbr.setCaptureQueryRequest(); // urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags, // length, offset, filename. setCdxLines(CDXLINE1); cut.setBaseStatusRegexp(""); { @SuppressWarnings("unused") SearchResults sr = cut.query(wbr); assertEquals(1, testCDXServer.capturedArgs.size()); Object[] args = testCDXServer.capturedArgs.get(0); CDXQuery query = (CDXQuery)args[0]; String[] filter = query.getFilter(); assertNull("there should be no filter", filter); } testCDXServer.clearCapturedArgs(); cut.setBaseStatusRegexp("!500"); { @SuppressWarnings("unused") SearchResults sr = cut.query(wbr); assertEquals(1, testCDXServer.capturedArgs.size()); Object[] args = testCDXServer.capturedArgs.get(0); CDXQuery query = (CDXQuery)args[0]; String[] filter = query.getFilter(); assertEquals(1, filter.length); assertEquals("!statuscode:500", filter[0]); } } /** * for those SURT prefixes in {@code ignoreRobotsPaths}, * {@link AuthToken#isIgnoreRobots()} flag is set. * @throws Exception */ public void testIgnoreRobotPaths() throws Exception { cut.setIgnoreRobotPaths(Arrays.asList(new String[]{ "com,norobots" })); WaybackRequest wbr = new WaybackRequest(); wbr.setRequestUrl("http://norobots.com/"); wbr.setCaptureQueryRequest(); // urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags, // length, offset, filename. setCdxLines(CDXLINE2); @SuppressWarnings("unused") SearchResults sr = cut.query(wbr); assertEquals(1, testCDXServer.capturedArgs.size()); Object[] args = testCDXServer.capturedArgs.get(0); //CDXQuery query = (CDXQuery)args[0]; AuthToken authToken = (AuthToken)args[1]; assertTrue(authToken.isIgnoreRobots()); } /** * test of timestamp-collapsing. * <p>Actual processing happens in {@link CDXServer}. {@link EmbeddedCDXServerIndex} * simply passes {@link WaybackRequest#getCollapseTime()} to {@link CDXQuery#setCollapse(String[])}. * if {@code collapseTime} is unspecified in {@code WaybackRequest} (-1), default value * {@code timestampDedupLength} will be used. * @throws Exception */ public void testCollapseTime() throws Exception { WaybackRequest wbr = WaybackRequest.createCaptureQueryRequet( "http://example.com/", null, null, null); setCdxLines(CDXLINE1); { cut.setTimestampDedupLength(10); @SuppressWarnings("unused") SearchResults sr = cut.query(wbr); Object[] args = testCDXServer.capturedArgs.get(0); assertEquals(10, ((CDXQuery)args[0]).getCollapseTime()); } testCDXServer.clearCapturedArgs(); { wbr.setCollapseTime(8); @SuppressWarnings("unused") SearchResults sr = cut.query(wbr); Object[] args = testCDXServer.capturedArgs.get(0); assertEquals(8, ((CDXQuery)args[0]).getCollapseTime()); } } /** * {@link EmbeddedCDXServerIndex#handleRequest(HttpServletRequest, HttpServletResponse)} is * a entry point for CDXServer API. It should return all accessible cdx lines, without applying * any additional filters not requested by API user. * @throws Exception */ public void testHandleRequest() throws Exception { HttpServletRequest request = EasyMock.createNiceMock(HttpServletRequest.class); EasyMock.expect(request.getParameter("url")).andStubReturn("http://example.com/"); HttpServletResponse response = EasyMock.createNiceMock(HttpServletResponse.class); StringWriter sw = new StringWriter(); EasyMock.expect(response.getWriter()).andReturn(new PrintWriter(sw)); FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES; testCDXServer.cdxLines = new CDXLine[] { new CDXLine(CDXLINE1, fmt) }; EasyMock.replay(request, response); cut.handleRequest(request, response); assertEquals(1, testCDXServer.capturedArgs.size()); Object[] args = testCDXServer.capturedArgs.get(0); CDXQuery query = (CDXQuery)args[0]; assertEquals("API query should not have filter by default", 0, query.getFilter().length); assertEquals(String.format("%1$s%n", CDXLINE1), sw.toString()); } /** * {@link EmbeddedCDXServerIndex#renderMementoTimemap(WaybackRequest, HttpServletRequest, HttpServletResponse)} * is a CDXServer API entry point for Memento format output. * @throws Exception */ public void testRenderMementoTimemap() throws Exception { HttpServletRequest request = EasyMock.createNiceMock(HttpServletRequest.class); // Used in MementoLinkWriter EasyMock.expect(request.getRequestURL()).andAnswer(new IAnswer<StringBuffer>() { @Override public StringBuffer answer() throws Throwable { return new StringBuffer("/timemap/memento/http://example.com/"); } }); HttpServletResponse response = EasyMock.createNiceMock(HttpServletResponse.class); StringWriter sw = new StringWriter(); EasyMock.expect(response.getWriter()).andReturn(new PrintWriter(sw)); // needs: // getMementoTimemapFormat() - passed to CDXQuery.output // getRequestUrl() - passed to CDXQuery // get(MementoConstants.PAGE_STARTS) (optional, passed to CDXQuery.from // getAccessPoint() - if getMementoTimemapFormat() == MementoConstants.FORMAT_LINK, // CDX is looked up by calling AccessPoint#queryIndex(WaybackRequest) WaybackRequest wbr = new WaybackRequest(); wbr.setRequestUrl("http://example.com/"); wbr.setMementoTimemapFormat("memento"); FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES; testCDXServer.cdxLines = new CDXLine[] { new CDXLine(CDXLINE1, fmt) }; EasyMock.replay(request, response); boolean r = cut.renderMementoTimemap(wbr, request, response); assertTrue("renderMementoTimemap returns true", r); assertEquals(1, testCDXServer.capturedArgs.size()); Object[] args = testCDXServer.capturedArgs.get(0); CDXQuery query = (CDXQuery)args[0]; assertEquals("API query should not have filter by default", 0, query.getFilter().length); // Here we only check if output *looks like* Memento format. Detailed tests // shall be done by test case for MementoLinkWriter. //System.out.println("response=" + sw.toString()); assertTrue(sw.toString().startsWith("<http://example.com/>;")); } // WaybackAuthChecker wants RedisRobotExclusionFilterFactory for // robotsExclusions. BAD, BAD, BAD! public static class ExcludeAllFilterFactory extends RedisRobotExclusionFilterFactory { @Override public ExclusionFilter get() { return new ExclusionFilter() { @Override public int filterObject(CaptureSearchResult o) { return ObjectFilter.FILTER_EXCLUDE; } }; } } // XXX CDXServer demands ZipNumCluster even though it doesn't // call methods specific to it. BAD. public static class StubZipNumCluster extends ZipNumCluster { List<String> cdxlines; public StubZipNumCluster(String... cdxlines) { this.cdxlines = Arrays.asList(cdxlines); } // method called by EmbeddedCDXServer.query(WaybackRequest) for // non-paged queries. @Override public CloseableIterator<String> getCDXIterator(String key, String start, String end, ZipNumParams params) throws IOException { return new WrappedCloseableIterator<String>(cdxlines.iterator()); } } /** * robots.txt exclusion shall be disable for embeds. * <p>TODO: This is actually testing classes in {@code wayback-cdx-server} * module. Implemented here because it takes more work to do this * in wayback-cdx-server module, and it makes little sense to do it before * planned refactoring.</p> * <p>Ref: WWM-119. A bug in {@link PrivTokenAuthChecker}.</p> * @throws Exception */ public void testIgnoreRobotsForEmbeds() throws Exception { CDXServer cdxServer = new CDXServer(); ZipNumCluster cdxSource = new StubZipNumCluster( "com,example)/style.css 20101124000000 http://example.com/style.css text/css 200" + " ABCDEFGHIJKLMNOPQRSTUVWXYZ012345 - - 2000 0 /a/a.warc.gz"); cdxServer.setZipnumSource(cdxSource); // This is the class being tested here... so AuthChecker shall no be mocked. // We cannot use PrivTokenAuthCheck class for this test, because it has no // real support for robots.txt exclusion. This is the main reason why we // cannot have this test in wayback-cdx-server project. WaybackAuthChecker authChecker = new WaybackAuthChecker(); authChecker.setRobotsExclusions(new ExcludeAllFilterFactory()); cdxServer.setAuthChecker(authChecker); cdxServer.afterPropertiesSet(); cut.setCdxServer(cdxServer); { WaybackRequest wbRequest = WaybackRequest.createReplayRequest( "http://example.com/style.css", "20140101000000", null, null); wbRequest.setCSSContext(true); // i.e. "embed" try { cut.query(wbRequest); } catch (RobotAccessControlException ex) { fail("robots.txt exclusion is not disabled for embeds"); } } // additional tests to make sure robots.txt exclusion is implemented // right, not just broken. these would have better been in a separate // test method(s), but just for now... CDX server refactoring will // break these anyways. { WaybackRequest wbRequest = WaybackRequest.createReplayRequest( "http://example.com/style.css", "20140101000000", null, null); // not embed try { cut.query(wbRequest); fail("RobotAccessControlException was not thrown"); } catch (RobotAccessControlException ex) { // expected. } } // check robots.txt exclusion is working for CDX server API entry point { HttpServletRequest httpRequest = EasyMock.createNiceMock(HttpServletRequest.class); EasyMock.expect(httpRequest.getParameter("url")).andStubReturn("http://exmaple.com/style.css"); HttpServletResponse httpResponse = EasyMock.createMock(HttpServletResponse.class); // expect error response; 403 with error header containing "Robot" final StringWriter output = new StringWriter(); EasyMock.expect(httpResponse.getWriter()).andReturn(new PrintWriter(output)); httpResponse.setContentType(EasyMock.<String>notNull()); EasyMock.expectLastCall().once(); httpResponse.setStatus(403); EasyMock.expectLastCall().once(); httpResponse.setHeader(EasyMock.eq(HttpCDXWriter.RUNTIME_ERROR_HEADER), EasyMock.matches("(?i).*Robot.*")); EasyMock.replay(httpRequest, httpResponse); cut.handleRequest(httpRequest, httpResponse); EasyMock.verify(httpResponse); } // check if robots.txt exclusion can be disabled by cookie. { final String IGNORE_ROBOTS_TOKEN = "DISABLE-ROBOTS-EXCLUSION"; authChecker.setIgnoreRobotsAccessTokens(Collections.singletonList(IGNORE_ROBOTS_TOKEN)); HttpServletRequest httpRequest = EasyMock.createNiceMock(HttpServletRequest.class); EasyMock.expect(httpRequest.getParameter("url")).andStubReturn("http://exmaple.com/style.css"); EasyMock.expect(httpRequest.getCookies()).andStubReturn( new Cookie[] { new Cookie(cdxServer.getCookieAuthToken(), IGNORE_ROBOTS_TOKEN) }); HttpServletResponse httpResponse = EasyMock.createMock(HttpServletResponse.class); // expect 200 response = robots exclusion is disabled. final StringWriter output = new StringWriter(); EasyMock.expect(httpResponse.getWriter()).andReturn(new PrintWriter(output)); httpResponse.setContentType(EasyMock.<String>notNull()); EasyMock.expectLastCall().once(); //httpResponse.setStatus(200); // this is not explicitly called //EasyMock.expectLastCall().once(); EasyMock.replay(httpRequest, httpResponse); cut.handleRequest(httpRequest, httpResponse); // if it's not working, EasyMock will report unexpected call to httpResponse.setStatus(403). EasyMock.verify(httpResponse); System.out.println(output.toString()); } } }