/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.zaproxy.zap.spider.parser;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import net.htmlparser.jericho.Attribute;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.FormField;
import net.htmlparser.jericho.FormFields;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import org.apache.commons.httpclient.URI;
import org.parosproxy.paros.network.HtmlParameter;
import org.parosproxy.paros.network.HtmlParameter.Type;
import org.parosproxy.paros.network.HttpMessage;
import org.zaproxy.zap.model.DefaultValueGenerator;
import org.zaproxy.zap.model.ValueGenerator;
import org.zaproxy.zap.spider.SpiderParam;
import org.zaproxy.zap.spider.URLCanonicalizer;
/**
* The Class SpiderHtmlFormParser is used for parsing HTML files for processing forms.
*/
public class SpiderHtmlFormParser extends SpiderParser {
private static final String ENCODING_TYPE = "UTF-8";
private static final String DEFAULT_EMPTY_VALUE = "";
private static final String METHOD_POST = "POST";
private URI uri;
private String url;
/** The form attributes*/
private Map<String, String> envAttributes = new HashMap<String, String>();
/** The spider parameters. */
private final SpiderParam param;
/**Create new Value Generator field*/
private final ValueGenerator valueGenerator;
/**
* Instantiates a new spider html form parser.
*
* @param param the parameters for the spider
* @throws IllegalArgumentException if {@code param} is null.
*/
public SpiderHtmlFormParser(SpiderParam param) {
this(param, new DefaultValueGenerator());
}
/**
* Instantiates a new spider html form parser.
*
* @param param the parameters for the spider
* @param param the parameters ValueGenerator
* @throws IllegalArgumentException if {@code param} or {@code valueGenerator} is null.
*/
public SpiderHtmlFormParser(SpiderParam param, ValueGenerator valueGenerator) {
super();
if (param == null) {
throw new IllegalArgumentException("Parameter param must not be null.");
}
if (valueGenerator == null){
throw new IllegalArgumentException("Parameter valueGenerator must not be null.");
}
this.param = param;
this.valueGenerator = valueGenerator;
}
@Override
public boolean parseResource(HttpMessage message, Source source, int depth) {
log.debug("Parsing an HTML message for forms...");
// If form processing is disabled, don't parse anything
if (!param.isProcessForm()) {
return false;
}
// Prepare the source, if not provided
if (source == null) {
source = new Source(message.getResponseBody().toString());
}
// Get the context (base url)
String baseURL = message.getRequestHeader().getURI().toString();
uri = message.getRequestHeader().getURI();
// Try to see if there's any BASE tag that could change the base URL
Element base = source.getFirstElement(HTMLElementName.BASE);
if (base != null) {
if (log.isDebugEnabled()) {
log.debug("Base tag was found in HTML: " + base.getDebugInfo());
}
String href = base.getAttributeValue("href");
if (href != null && !href.isEmpty()) {
baseURL = URLCanonicalizer.getCanonicalURL(href, baseURL);
}
}
// Go through the forms
List<Element> forms = source.getAllElements(HTMLElementName.FORM);
for (Element form : forms) {
//Clear the attributes for each form and store their key and values
envAttributes.clear();
for (Attribute att : form.getAttributes()){
envAttributes.put(att.getKey(), att.getValue());
}
// Get method and action
String method = form.getAttributeValue("method");
String action = form.getAttributeValue("action");
log.debug("Found new form with method: '" + method + "' and action: " + action);
// If no action, skip the form
if (action == null) {
log.debug("No form 'action' defined. Using base URL: " + baseURL);
action = baseURL;
}
// If POSTing forms is not enabled, skip processing of forms with POST method
if (!param.isPostForm() && method != null && method.trim().equalsIgnoreCase(METHOD_POST)) {
log.debug("Skipping form with POST method because of user settings.");
continue;
}
// Clear the fragment, if any, as it does not have any relevance for the server
if (action.contains("#")) {
int fs = action.lastIndexOf("#");
action = action.substring(0, fs);
}
url = URLCanonicalizer.getCanonicalURL(action, baseURL);
FormData formData = prepareFormDataSet(form.getFormFields());
// Process the case of a POST method
if (method != null && method.trim().equalsIgnoreCase(METHOD_POST)) {
// Build the absolute canonical URL
String fullURL = URLCanonicalizer.getCanonicalURL(action, baseURL);
if (fullURL == null) {
return false;
}
log.debug("Canonical URL constructed using '" + action + "': " + fullURL);
/*
* Ignore encoding, as we will not POST files anyway, so using
* "application/x-www-form-urlencoded" is adequate
*/
// String encoding = form.getAttributeValue("enctype");
// if (encoding != null && encoding.equals("multipart/form-data"))
String baseRequestBody = buildEncodedUrlQuery(formData.getFields());
if (formData.getSubmitFields().isEmpty()) {
notifyPostResourceFound(message, depth, fullURL, baseRequestBody);
continue;
}
for (HtmlParameter submitField : formData.getSubmitFields()) {
notifyPostResourceFound(
message,
depth,
fullURL,
appendEncodedUrlQueryParameter(baseRequestBody, submitField));
}
} // Process anything else as a GET method
else {
// Process the final URL
if (action.contains("?")) {
if (action.endsWith("?")) {
processGetForm(message, depth, action, baseURL, formData);
} else {
processGetForm(message, depth, action + "&", baseURL, formData);
}
} else {
processGetForm(message, depth, action + "?", baseURL, formData);
}
}
}
return false;
}
/**
* Processes the given GET form data into, possibly, several URLs.
* <p>
* For each submit field present in the form data is processed one URL, which includes remaining normal fields.
*
* @param message the source message
* @param depth the current depth
* @param action the action
* @param baseURL the base URL
* @param formData the GET form data
* @see #processURL(HttpMessage, int, String, String)
*/
private void processGetForm(HttpMessage message, int depth, String action, String baseURL, FormData formData) {
String baseQuery = buildEncodedUrlQuery(formData.getFields());
if (formData.getSubmitFields().isEmpty()) {
log.debug("Submiting form with GET method and query with form parameters: " + baseQuery);
processURL(message, depth, action + baseQuery, baseURL);
} else {
for (HtmlParameter submitField : formData.getSubmitFields()) {
String query = appendEncodedUrlQueryParameter(baseQuery, submitField);
log.debug("Submiting form with GET method and query with form parameters: " + query);
processURL(message, depth, action + query, baseURL);
}
}
}
/**
* Prepares the form data set. A form data set is a sequence of control-name/current-value pairs
* constructed from successful controls, which will be sent with a GET/POST request for a form.
*
* @see <a href="https://www.w3.org/TR/REC-html40/interact/forms.html#form-data-set">HTML 4.01 Specification - 17.13.3
* Processing form data</a>
* @see <a href="https://html.spec.whatwg.org/multipage/forms.html#association-of-controls-and-forms">HTML 5 - 4.10.18.3
* Association of controls and forms</a>
* @param form the form
* @return the list
*/
private FormData prepareFormDataSet(FormFields form) {
List<HtmlParameter> formDataSet = new LinkedList<>();
List<HtmlParameter> submitFields = new ArrayList<>();
// Process each form field
Iterator<FormField> it = form.iterator();
while (it.hasNext()) {
FormField field = it.next();
if (log.isDebugEnabled()) {
log.debug("New form field: " + field.getDebugInfo());
}
List<HtmlParameter> currentList = formDataSet;
if (field.getFormControl().getFormControlType().isSubmit()) {
currentList = submitFields;
}
for (String value : getDefaultTextValue(field)) {
currentList.add(new HtmlParameter(Type.form, field.getName(), value));
}
}
return new FormData(formDataSet, submitFields);
}
/**
* Gets the values for the given {@code field}.
* If the field is of submit type it passes the predefined values to the ValueGenerator and returns its predefined values.
* Gets the default value that the input field, including HTML5 types, should have.
*
* @param field the field
* @return a list with the values
*/
private List<String> getDefaultTextValue(FormField field) {
// Get the Id
String fieldId = field.getName();
// Create new HashMap 'fieldAttributes' and new list 'definedValues'
Map<String, String> fieldAttributes = new HashMap<String, String>();
List<String> definedValues = new ArrayList<String>();
//Store all values in the FormFiled field into the Map 'fieldAttributes'
fieldAttributes.putAll(field.getFormControl().getAttributesMap());
// Places a key, Control Type, for each FormControlType
fieldAttributes.put("Control Type", field.getFormControl().getFormControlType().name());
//Handles Submit Fields
if (field.getFormControl().getFormControlType().isSubmit()) {
List<String> submitFields = new ArrayList<String>();
for (String value : field.getPredefinedValues()){
String finalValue = this.valueGenerator.getValue(uri, url, fieldId, value, definedValues, envAttributes, fieldAttributes);
submitFields.add(finalValue);
}
return submitFields;
}
// Get its value(s)
List<String> values = field.getValues();
String defaultValue;
//If the field has a value attribute present(Predefined value)
//Should store the value being submitted to be passed to the ValueGenerator
if(field.getFormControl().getAttributesMap().containsKey("value")){
defaultValue = field.getFormControl().getAttributesMap().get("value");
}
if (log.isDebugEnabled()) {
log.debug("Existing values: " + values);
}
// If there are no values at all or only an empty value
if (values.isEmpty() || (values.size() == 1 && values.get(0).isEmpty())) {
defaultValue = DEFAULT_EMPTY_VALUE;
// Check if we can use predefined values
Collection<String> predefValues = field.getPredefinedValues();
if (!predefValues.isEmpty()) {
//Store those predefined values in a list for the DefaultValueGenerator
definedValues.addAll(predefValues);
// Try first elements
Iterator<String> iterator = predefValues.iterator();
defaultValue = iterator.next();
// If there are more values, don't use the first, as it usually is a "No select"
// item
if (iterator.hasNext()) {
defaultValue = iterator.next();
}
}
} else {
defaultValue = values.get(0);
}
//Get the default value used in DefaultValueGenerator
String finalValue = this.valueGenerator.getValue(uri, url, fieldId, defaultValue, definedValues, envAttributes, fieldAttributes);
log.debug("Generated: " + finalValue + "For field " + field.getName());
values = new ArrayList<>(1);
values.add(finalValue);
return values;
}
/**
* Notifies listeners that a new POST resource was found.
*
* @param message the source message
* @param depth the current depth
* @param url the URL of the resource
* @param requestBody the request body
* @see #notifyListenersPostResourceFound(HttpMessage, int, String, String)
*/
private void notifyPostResourceFound(HttpMessage message, int depth, String url, String requestBody) {
log.debug("Submiting form with POST method and message body with form parameters (normal encoding): " + requestBody);
notifyListenersPostResourceFound(message, depth + 1, url, requestBody);
}
/**
* Builds the query, encoded with "application/x-www-form-urlencoded".
*
* @see <a href="https://www.w3.org/TR/REC-html40/interact/forms.html#form-content-type">HTML 4.01 Specification - 17.13.4
* Form content types</a>
* @param formDataSet the form data set
* @return the query
*/
private String buildEncodedUrlQuery(List<HtmlParameter> formDataSet) {
StringBuilder request = new StringBuilder();
// Build the query
for (HtmlParameter p : formDataSet) {
String v;
try {
v = URLEncoder.encode(p.getName(), ENCODING_TYPE);
request.append(v);
request.append("=");
v = URLEncoder.encode(p.getValue(), ENCODING_TYPE);
request.append(v);
} catch (UnsupportedEncodingException e) {
log.warn("Error while encoding query for form.", e);
}
request.append("&");
}
// Delete the last ampersand
if (request.length() > 0) {
request.deleteCharAt(request.length() - 1);
}
return request.toString();
}
/**
* Appends the given {@code parameter} into the given {@code query}.
*
* @param query the query
* @param parameter the parameter to append
* @return the query with the parameter appended
*/
private static String appendEncodedUrlQueryParameter(String query, HtmlParameter parameter) {
StringBuilder strBuilder = new StringBuilder(query);
if (strBuilder.length() != 0) {
strBuilder.append('&');
}
try {
strBuilder.append(URLEncoder.encode(parameter.getName(), ENCODING_TYPE))
.append('=')
.append(URLEncoder.encode(parameter.getValue(), ENCODING_TYPE));
} catch (UnsupportedEncodingException e) {
log.warn("Error while encoding query for form.", e);
}
return strBuilder.toString();
}
@Override
public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyConsumed) {
// Fallback parser - if it's a HTML message which has not already been processed
return !wasAlreadyConsumed && message.getResponseHeader().isHtml();
}
/**
* The fields (and its values) of a HTML form.
* <p>
* Normal fields and submit fields are kept apart.
*/
private static class FormData {
private final List<HtmlParameter> fields;
private final List<HtmlParameter> submitFields;
public FormData(List<HtmlParameter> fields, List<HtmlParameter> submitFields) {
this.fields = fields;
this.submitFields = submitFields;
}
public List<HtmlParameter> getFields() {
return fields;
}
public List<HtmlParameter> getSubmitFields() {
return submitFields;
}
}
}