/* * Zed Attack Proxy (ZAP) and its related class files. * * ZAP is an HTTP/HTTPS proxy for assessing web application security. * * Copyright 2016 The ZAP Development Team * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.zaproxy.zap.spider.parser; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertThat; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.log4j.Logger; import org.apache.log4j.varia.NullAppender; import org.junit.BeforeClass; import org.junit.Test; import org.parosproxy.paros.network.HttpMessage; import org.zaproxy.zap.spider.SpiderParam; import net.htmlparser.jericho.Source; /** * Unit test for {@link SpiderHtmlParser}. */ public class SpiderHtmlParserUnitTest extends SpiderParserTestUtils { private static final String ROOT_PATH = "/"; private static final int BASE_DEPTH = 0; private static final Path BASE_DIR_HTML_FILES = Paths.get("test/resources/org/zaproxy/zap/spider/parser/html"); @BeforeClass public static void suppressLogging() { Logger.getRootLogger().addAppender(new NullAppender()); } @Test(expected = IllegalArgumentException.class) public void shouldFailToCreateParserWithUndefinedSpiderOptions() { // Given SpiderParam undefinedSpiderOptions = null; // When new SpiderHtmlParser(undefinedSpiderOptions); // Then = IllegalArgumentException } @Test(expected = NullPointerException.class) public void shouldFailToEvaluateAnUndefinedMessage() { // Given HttpMessage undefinedMessage = null; SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); // When htmlParser.canParseResource(undefinedMessage, ROOT_PATH, false); // Then = NullPointerException } @Test public void shouldParseHtmlResponse() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); HttpMessage messageHtmlResponse = createMessageWith("NoURLsSpiderHtmlParser.html"); boolean parsed = false; // When boolean canParse = htmlParser.canParseResource(messageHtmlResponse, ROOT_PATH, parsed); // Then assertThat(canParse, is(equalTo(true))); } @Test public void shouldParseHtmlResponseEvenIfProvidedPathIsNull() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); HttpMessage messageHtmlResponse = createMessageWith("NoURLsSpiderHtmlParser.html"); boolean parsed = false; // When boolean canParse = htmlParser.canParseResource(messageHtmlResponse, null, parsed); // Then assertThat(canParse, is(equalTo(true))); } @Test public void shouldNotParseHtmlResponseIfAlreadyParsed() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); HttpMessage messageHtmlResponse = createMessageWith("NoURLsSpiderHtmlParser.html"); boolean parsed = true; // When boolean canParse = htmlParser.canParseResource(messageHtmlResponse, ROOT_PATH, parsed); // Then assertThat(canParse, is(equalTo(false))); } @Test(expected = NullPointerException.class) public void shouldFailToParseAnUndefinedMessage() { // Given HttpMessage undefinedMessage = null; SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); Source source = createSource(createMessageWith("NoURLsSpiderHtmlParser.html")); // When htmlParser.parseResource(undefinedMessage, source, BASE_DEPTH); // Then = NullPointerException } @Test public void shouldParseMessageEvenWithoutSource() { // Given Source source = null; SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); HttpMessage messageHtmlResponse = createMessageWith("NoURLsSpiderHtmlParser.html"); // When htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then = No exception } @Test public void shouldNeverConsiderCompletelyParsed() { // Given Source source = null; SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); HttpMessage messageHtmlResponse = createMessageWith("NoURLsSpiderHtmlParser.html"); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); } @Test public void shouldFindUrlsInAElements() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("AElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7))); assertThat( listener.getUrlsFound(), contains( "http://a.example.com/base/scheme", "http://a.example.com:8000/b", "https://a.example.com/c?a=b", "http://example.com/sample/a/relative", "http://example.com/sample/", "http://example.com/a/absolute", "ftp://a.example.com/")); } @Test public void shouldUseMessageUriIfNoBaseElement() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("NoBaseWithAElementSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(1))); assertThat(listener.getUrlsFound(), contains("http://example.com/relative/no/base")); } @Test public void shouldUseAbsolutePathBaseElement() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("BaseWithAbsolutePathHrefAElementSpiderHtmlParser.html", "/a/b"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(2))); assertThat( listener.getUrlsFound(), contains("http://example.com/base/absolute/path/relative/a/element", "http://example.com/absolute/a/element")); } @Test public void shouldUseRelativePathBaseElement() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("BaseWithRelativePathHrefAElementSpiderHtmlParser.html", "/a/b"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(2))); assertThat(listener.getUrlsFound(), contains( "http://example.com/a/base/relative/path/relative/a/element", "http://example.com/absolute/a/element")); } @Test public void shouldIgnoreBaseAndUseMessageUriIfBaseElementDoesNotHaveHref() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("BaseWithoutHrefAElementSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(1))); assertThat(listener.getUrlsFound(), contains("http://example.com/relative/no/base")); } @Test public void shouldIgnoreBaseAndUseMessageUriIfBaseElementHaveEmptyHref() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("BaseWithEmptyHrefAElementSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(1))); assertThat(listener.getUrlsFound(), contains("http://example.com/relative/no/base")); } @Test public void shouldFindUrlsInAreaElements() throws Exception { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("AreaElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7))); assertThat( listener.getUrlsFound(), contains( "http://area.example.com/base/scheme", "http://area.example.com:8000/b", "https://area.example.com/c?a=b", "http://example.com/sample/area/relative", "http://example.com/sample/", "http://example.com/area/absolute", "ftp://area.example.com/")); } @Test public void shouldFindUrlsInFrameElements() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("FrameElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7))); assertThat( listener.getUrlsFound(), contains( "http://frame.example.com/base/scheme", "http://frame.example.com:8000/b", "https://frame.example.com/c?a=b", "http://example.com/sample/frame/relative", "http://example.com/sample/", "http://example.com/frame/absolute", "ftp://frame.example.com/")); } @Test public void shouldFindUrlsInIFrameElements() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("IFrameElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7))); assertThat( listener.getUrlsFound(), contains( "http://iframe.example.com/base/scheme", "http://iframe.example.com:8000/b", "https://iframe.example.com/c?a=b", "http://example.com/sample/iframe/relative", "http://example.com/sample/", "http://example.com/iframe/absolute", "ftp://iframe.example.com/")); } @Test public void shouldFindUrlsInLinkElements() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("LinkElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7))); assertThat( listener.getUrlsFound(), contains( "http://link.example.com/base/scheme", "http://link.example.com:8000/b", "https://link.example.com/c?a=b", "http://example.com/sample/link/relative", "http://example.com/sample/", "http://example.com/link/absolute", "ftp://link.example.com/")); } @Test public void shouldFindUrlsInScriptElements() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("ScriptElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7))); assertThat( listener.getUrlsFound(), contains( "http://script.example.com/base/scheme", "http://script.example.com:8000/b", "https://script.example.com/c?a=b", "http://example.com/sample/script/relative", "http://example.com/sample/", "http://example.com/script/absolute", "ftp://script.example.com/")); } @Test public void shouldFindUrlsInImgElements() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("ImgElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(7))); assertThat( listener.getUrlsFound(), contains( "http://img.example.com/base/scheme", "http://img.example.com:8000/b", "https://img.example.com/c?a=b", "http://example.com/sample/img/relative", "http://example.com/sample/", "http://example.com/img/absolute", "ftp://img.example.com/")); } @Test public void shouldFindUrlsInMetaElements() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("MetaElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(10))); assertThat( listener.getUrlsFound(), contains( "http://meta.example.com:8443/refresh/base/scheme", "https://meta.example.com/refresh", "http://example.com/sample/meta/refresh/relative", "http://example.com/meta/refresh/absolute", "ftp://meta.example.com/refresh", "http://meta.example.com:8080/location/base/scheme", "https://meta.example.com/location", "http://example.com/sample/meta/location/relative", "http://example.com/meta/location/absolute", "ftp://meta.example.com/location")); } @Test public void shouldFindUrlsInCommentsWithElements() { // AKA shouldNotFindPlainUrlsInCommentsWithElements // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("CommentWithElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(9))); assertThat( listener.getUrlsFound(), contains( "http://a.example.com/", "http://area.example.com/", "http://frame.example.com/", "http://iframe.example.com/", "http://img.example.com/", "http://link.example.com/", "http://meta.example.com/refresh/", "http://meta.example.com/location/", "http://script.example.com/")); } @Test public void shouldNotFindUrlsInCommentsWithElementsIfNotEnabledToParseComments() { // Given SpiderParam spiderOptions = createSpiderParamWithConfig(); spiderOptions.setParseComments(false); SpiderHtmlParser htmlParser = new SpiderHtmlParser(spiderOptions); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("CommentWithElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(0))); assertThat(listener.getUrlsFound(), is(empty())); } @Test public void shouldFindUrlsInCommentsWithoutElements() { // Given SpiderHtmlParser htmlParser = new SpiderHtmlParser(new SpiderParam()); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("CommentWithoutElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(10))); assertThat( listener.getUrlsFound(), contains( "http://plaincomment.example.com/", "http://plaincomment.example.com/z.php?x=y", "http://plaincomment.example.com/c.pl?x=y", "https://plaincomment.example.com/d.asp?x=y", "https://plaincomment.example.com/e/e1/e2.html?x=y", "https://plaincomment.example.com/surrounded/with/parenthesis", "https://plaincomment.example.com/surrounded/with/brackets", "https://plaincomment.example.com/surrounded/with/curly/brackets", "http://plaincomment.example.com/variant1", "http://plaincomment.example.com/variant2")); } @Test public void shouldNotFindUrlsInCommentsWithoutElementsIfNotEnabledToParseComments() { // Given SpiderParam spiderOptions = createSpiderParamWithConfig(); spiderOptions.setParseComments(false); SpiderHtmlParser htmlParser = new SpiderHtmlParser(spiderOptions); TestSpiderParserListener listener = createTestSpiderParserListener(); htmlParser.addSpiderParserListener(listener); HttpMessage messageHtmlResponse = createMessageWith("CommentWithoutElementsSpiderHtmlParser.html"); Source source = createSource(messageHtmlResponse); // When boolean completelyParsed = htmlParser.parseResource(messageHtmlResponse, source, BASE_DEPTH); // Then assertThat(completelyParsed, is(equalTo(false))); assertThat(listener.getNumberOfUrlsFound(), is(equalTo(0))); assertThat(listener.getUrlsFound(), is(empty())); } private static HttpMessage createMessageWith(String filename) { return createMessageWith(filename, "/"); } private static HttpMessage createMessageWith(String filename, String requestUri) { HttpMessage message = new HttpMessage(); try { String fileContents = readFile(BASE_DIR_HTML_FILES.resolve(filename)); message.setRequestHeader("GET " + requestUri + " HTTP/1.1\r\nHost: example.com\r\n"); message.setResponseHeader( "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Content-Length: " + fileContents.length()); message.setResponseBody(fileContents); } catch (Exception e) { throw new RuntimeException(e); } return message; } }