/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Copyright 2016 The ZAP Development Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.zaproxy.zap.spider.parser;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
import org.apache.log4j.Logger;
import org.apache.log4j.varia.NullAppender;
import org.junit.BeforeClass;
import org.junit.Test;
import org.parosproxy.paros.network.HttpMessage;
import net.htmlparser.jericho.Source;
/**
* Unit test for {@link SpiderTextParser}.
*/
public class SpiderTextParserUnitTest extends SpiderParserTestUtils {
private static final String EMPTY_BODY = "";
private static final String ROOT_PATH = "/";
private static final int BASE_DEPTH = 0;
@BeforeClass
public static void suppressLogging() {
Logger.getRootLogger().addAppender(new NullAppender());
}
@Test(expected = NullPointerException.class)
public void shouldFailToEvaluateAnUndefinedMessage() {
// Given
HttpMessage undefinedMessage = null;
SpiderTextParser spiderParser = new SpiderTextParser();
// When
spiderParser.canParseResource(undefinedMessage, ROOT_PATH, false);
// Then = NullPointerException
}
@Test
public void shouldNotParseMessageIfAlreadyParsed() {
// Given
SpiderTextParser spiderParser = new SpiderTextParser();
boolean parsed = true;
// When
boolean canParse = spiderParser.canParseResource(new HttpMessage(), ROOT_PATH, parsed);
// Then
assertThat(canParse, is(equalTo(false)));
}
@Test
public void shouldNotParseNonTextResponse() {
// Given
HttpMessage message = createMessageWith("application/xyz", EMPTY_BODY);
SpiderTextParser spiderParser = new SpiderTextParser();
boolean parsed = false;
// When
boolean canParse = spiderParser.canParseResource(message, ROOT_PATH, parsed);
// Then
assertThat(canParse, is(equalTo(false)));
}
@Test
public void shouldNotParseTextHtmlResponse() {
// Given
HttpMessage message = createMessageWith("text/html", EMPTY_BODY);
SpiderTextParser spiderParser = new SpiderTextParser();
boolean parsed = false;
// When
boolean canParse = spiderParser.canParseResource(message, ROOT_PATH, parsed);
// Then
assertThat(canParse, is(equalTo(false)));
}
@Test
public void shouldParseTextResponse() {
// Given
SpiderTextParser spiderParser = new SpiderTextParser();
HttpMessage messageHtmlResponse = createMessageWith(EMPTY_BODY);
boolean parsed = false;
// When
boolean canParse = spiderParser.canParseResource(messageHtmlResponse, ROOT_PATH, parsed);
// Then
assertThat(canParse, is(equalTo(true)));
}
@Test
public void shouldParseTextResponseEvenIfProvidedPathIsNull() {
// Given
SpiderTextParser spiderParser = new SpiderTextParser();
HttpMessage messageHtmlResponse = createMessageWith(EMPTY_BODY);
boolean parsed = false;
// When
boolean canParse = spiderParser.canParseResource(messageHtmlResponse, null, parsed);
// Then
assertThat(canParse, is(equalTo(true)));
}
@Test
public void shouldNotParseTextResponseIfAlreadyParsed() {
// Given
SpiderTextParser spiderParser = new SpiderTextParser();
HttpMessage messageHtmlResponse = createMessageWith(EMPTY_BODY);
boolean parsed = true;
// When
boolean canParse = spiderParser.canParseResource(messageHtmlResponse, ROOT_PATH, parsed);
// Then
assertThat(canParse, is(equalTo(false)));
}
@Test(expected = NullPointerException.class)
public void shouldFailToParseAnUndefinedMessage() {
// Given
HttpMessage undefinedMessage = null;
SpiderTextParser spiderParser = new SpiderTextParser();
Source source = createSource(createMessageWith(EMPTY_BODY));
// When
spiderParser.parseResource(undefinedMessage, source, BASE_DEPTH);
// Then = NullPointerException
}
@Test
public void shouldNeverConsiderCompletelyParsed() {
// Given
SpiderTextParser spiderParser = new SpiderTextParser();
HttpMessage message = createMessageWith("Non Empty Body...");
Source source = createSource(message);
// When
boolean completelyParsed = spiderParser.parseResource(message, source, BASE_DEPTH);
// Then
assertThat(completelyParsed, is(equalTo(false)));
}
@Test
public void shouldNotFindUrlsIfThereIsNone() {
// Given
SpiderTextParser spiderParser = new SpiderTextParser();
TestSpiderParserListener listener = createTestSpiderParserListener();
spiderParser.addSpiderParserListener(listener);
HttpMessage message = createMessageWith(
body(
"Body with no HTTP/S URLs",
" ://example.com/ ",
"More text... ftp://ftp.example.com/ ",
"Even more text... //noscheme.example.com "));
Source source = createSource(message);
// When
boolean completelyParsed = spiderParser.parseResource(message, source, BASE_DEPTH);
// Then
assertThat(completelyParsed, is(equalTo(false)));
assertThat(listener.getNumberOfUrlsFound(), is(equalTo(0)));
assertThat(listener.getUrlsFound(), is(empty()));
}
@Test
public void shouldFindUrlsInCommentsWithoutElements() {
// Given
SpiderTextParser spiderParser = new SpiderTextParser();
TestSpiderParserListener listener = createTestSpiderParserListener();
spiderParser.addSpiderParserListener(listener);
HttpMessage messageHtmlResponse = createMessageWith(
body(
"Body with HTTP/S URLs",
" - http://plaincomment.example.com some text not part of URL",
"- \"https://plaincomment.example.com/z.php?x=y\" more text not part of URL",
"- 'http://plaincomment.example.com/c.pl?x=y' even more text not part of URL",
"- <https://plaincomment.example.com/d.asp?x=y> ...",
"- http://plaincomment.example.com/e/e1/e2.html?x=y#stop fragment should be ignored",
"- (https://plaincomment.example.com/surrounded/with/parenthesis) parenthesis should not be included",
"- [https://plaincomment.example.com/surrounded/with/brackets] brackets should not be included",
"- {https://plaincomment.example.com/surrounded/with/curly/brackets} curly brackets should not be included",
"- mixed case URLs HtTpS://ExAmPlE.CoM/path/ should also be found"));
Source source = createSource(messageHtmlResponse);
// When
boolean completelyParsed = spiderParser.parseResource(messageHtmlResponse, source, BASE_DEPTH);
// Then
assertThat(completelyParsed, is(equalTo(false)));
assertThat(listener.getNumberOfUrlsFound(), is(equalTo(9)));
assertThat(
listener.getUrlsFound(),
contains(
"http://plaincomment.example.com/",
"https://plaincomment.example.com/z.php?x=y",
"http://plaincomment.example.com/c.pl?x=y",
"https://plaincomment.example.com/d.asp?x=y",
"http://plaincomment.example.com/e/e1/e2.html?x=y",
"https://plaincomment.example.com/surrounded/with/parenthesis",
"https://plaincomment.example.com/surrounded/with/brackets",
"https://plaincomment.example.com/surrounded/with/curly/brackets",
"https://example.com/path/"));
}
private static HttpMessage createMessageWith(String body) {
return createMessageWith("text/xyz", body);
}
private static HttpMessage createMessageWith(String contentType, String body) {
return createMessageWith("200 OK", contentType, body);
}
private static HttpMessage createMessageWith(String statusCodeMessage, String contentType, String body) {
HttpMessage message = new HttpMessage();
try {
message.setRequestHeader("GET / HTTP/1.1\r\nHost: example.com\r\n");
message.setResponseHeader(
"HTTP/1.1 " + statusCodeMessage + "\r\n" + "Content-Type: " + contentType + "; charset=UTF-8\r\n"
+ "Content-Length: " + body.length());
message.setResponseBody(body);
} catch (Exception e) {
throw new RuntimeException(e);
}
return message;
}
private static String body(String... strings) {
if (strings == null || strings.length == 0) {
return "";
}
StringBuilder strBuilder = new StringBuilder(strings.length * 25);
for (String string : strings) {
if (strBuilder.length() > 0) {
strBuilder.append("\n");
}
strBuilder.append(string);
}
return strBuilder.toString();
}
}