/* * Zed Attack Proxy (ZAP) and its related class files. * * ZAP is an HTTP/HTTPS proxy for assessing web application security. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.zaproxy.zap.httputils; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import net.htmlparser.jericho.Attribute; import net.htmlparser.jericho.Element; import net.htmlparser.jericho.Source; import org.parosproxy.paros.network.HttpMessage; public class HtmlContextAnalyser { private char [] quotes = {'\'', '"'}; // Tag attributes which can contain javascript private String [] scriptAttributes = { "onBlur", "onChange", "onClick", "onDblClick", "onFocus", "onKeydown", "onKeyup", "onKeypress", "onLoad", "onMousedown", "onMouseup", "onMouseover", "onMousemove", "onMouseout", "onReset", "onSelect", "onSubmit", "onUnload" }; // Tag attributes which can contain a URL private String [] urlAttributes = { "action", "background", "cite", "classid", "codebase", "data", "formaction", "href", "icon", "longdesc", "manifest", "poster", "profile", "src", "usemap", }; // Tags which can have a 'src' attribute private String [] tagsWithSrcAttributes = { "frame", "iframe", "img", "input", // Special case - should also check to see if it has a type of 'image' "script", "src", }; private HttpMessage msg = null; private String htmlPage = null; private Source src = null; public HtmlContextAnalyser (HttpMessage msg) { this.msg = msg; this.htmlPage = msg.getResponseHeader().toString() + msg.getResponseBody().toString(); src = new Source(htmlPage); src.fullSequentialParse(); } private boolean isQuote (char chr) { for (int i=0; i < quotes.length; i++) { if (chr == quotes[i]) { return true; } } return false; } private boolean isScriptAttribute (String att) { for (int i=0; i < scriptAttributes.length; i++) { if (att.equalsIgnoreCase(scriptAttributes[i])) { return true; } } return false; } private boolean isUrlAttribute (String att) { for (int i=0; i < urlAttributes.length; i++) { if (att.equalsIgnoreCase(urlAttributes[i])) { return true; } } return false; } private boolean isInTagWithSrcAttribute (String tag) { for (int i=0; i < tagsWithSrcAttributes.length; i++) { if (tag.equalsIgnoreCase(tagsWithSrcAttributes[i])) { return true; } } return false; } public List<HtmlContext> getHtmlContexts (String target) { return this.getHtmlContexts(target, null, 0); } public List<HtmlContext> getHtmlContexts (String target, HtmlContext targetContext, int ignoreFlags) { List<HtmlContext> contexts = new ArrayList<>(); int offset = 0; while ((offset = htmlPage.indexOf(target, offset)) >= 0) { HtmlContext context = new HtmlContext(this.msg, target, offset, offset + target.length()); offset += target.length(); // Is it in quotes? char leftQuote = 0; for (int i=context.getStart()-1; i > 0; i--) { char chr = htmlPage.charAt(i); if (isQuote(chr)) { leftQuote = chr; break; } else if (chr == '>') { // end of another tag break; } } if (leftQuote != 0) { for (int i=context.getEnd(); i < htmlPage.length(); i++) { char chr = htmlPage.charAt(i); if (leftQuote == chr) { // matching quote context.setSurroundingQuote("" + leftQuote); break; } else if (isQuote(chr)) { // Another non matching quote break; } else if (chr == '<') { // start of another tag break; } } } // is it in an HTML comment? String prefix = htmlPage.substring(0, context.getStart()); if (prefix.lastIndexOf("<!--") > prefix.lastIndexOf(">")) { // Also check closing comment? context.setHtmlComment(true); } // Work out the location in the DOM Element element = src.getEnclosingElement(context.getStart()); if (element != null) { // See if its in an attribute boolean isInputTag = element.getName().equalsIgnoreCase("input"); // Special case for input src attributes boolean isImageInputTag = false; Iterator<Attribute> iter = element.getAttributes().iterator(); while (iter.hasNext()) { Attribute att = iter.next(); if (att.getValue() != null && att.getValue().toLowerCase().indexOf(target.toLowerCase()) >= 0) { // Found the injected value context.setTagAttribute(att.getName()); context.setInUrlAttribute(this.isUrlAttribute(att.getName())); context.setInScriptAttribute(this.isScriptAttribute(att.getName())); } if (isInputTag && att.getName().equalsIgnoreCase("type") && "image".equalsIgnoreCase(att.getValue())) { isImageInputTag = true; } } // record the tag hierarchy context.addParentTag(element.getName()); if (! isInputTag || isImageInputTag) { // Input tags only use the src attribute if the type is 'image' context.setInTagWithSrc(this.isInTagWithSrcAttribute(element.getName())); } while ((element = element.getParentElement()) != null) { context.addParentTag(element.getName()); } } if (targetContext == null) { // Always add contexts.add(context); } else if (targetContext.matches(context, ignoreFlags)) { // Matches the supplied context contexts.add(context); } } return contexts; } }