/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Copyright 2013 The ZAP Development team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.parosproxy.paros.core.scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringEscapeUtils;
/**
* Variant specialized
*
* @author andy
*/
public class VariantXMLQuery extends VariantAbstractRPCQuery {
public static final String XML_CONTENT_TYPE = "text/xml";
public static final String XML_APP_CONTENT_TYPE = "application/xml";
public static final String SOAP2_CONTENT_TYPE = "application/soap+xml";
// XML standard from W3C Consortium
// ---------------------------------------------
// STag ::= '<' Name (S Attribute)* S? '>'
// NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
// NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
// Name ::= NameStartChar (NameChar)*
// S ::= (0x20 0x09 0x0d 0x0a)+ in Java (\s)
// Attribute ::= Name Eq AttValue
// Eq ::= S? '=' S?
// AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
// ----------------------------------------------
private final static String attRegex = "(\\S+)\\s*=\\s*((?:\"[^\"\\&\\<]*\")|(?:'[^'\\&\\<]*'))";
private final static String tagRegex = "\\<([\\_\\:A-Za-z][\\_\\:A-Za-z0-9\\-\\.]*)\\s*[^\\>]*\\>((?:\\<\\!\\[CDATA\\[(?:.(?<!\\]\\]>))*\\]\\]>)|(?:[^\\<\\&]*))\\<\\/[\\_\\:A-Za-z][\\_\\:A-Za-z0-9\\-\\.]*\\s*\\>";
private Pattern attPattern = Pattern.compile(attRegex);
private Pattern tagPattern = Pattern.compile(tagRegex);
/**
*
* @param contentType
* @return
*/
@Override
public boolean isValidContentType(String contentType) {
// Could be generalized finding only the "xml" string
// but should become too much extended
return contentType.startsWith(XML_CONTENT_TYPE) ||
contentType.startsWith(SOAP2_CONTENT_TYPE) ||
contentType.startsWith(XML_APP_CONTENT_TYPE);
}
/**
*
* @param value
* @param toQuote
* @return
*/
@Override
public String getEscapedValue(String value, boolean toQuote) {
return StringEscapeUtils.escapeXml(value);
}
/**
*
* @param value
* @return
*/
@Override
public String getUnescapedValue(String value) {
return StringEscapeUtils.unescapeXml(value);
}
/**
*
* @param content
*/
@Override
public void parseContent(String content) {
Matcher matcher = attPattern.matcher(content);
String value;
int bidx;
int eidx;
while (matcher.find()) {
bidx = matcher.start(2) + 1;
eidx = matcher.end(2) - 1;
addParameter(matcher.group(1), bidx, eidx, false, false);
}
matcher = tagPattern.matcher(content);
while (matcher.find()) {
// if it is a CDATA content dequeue
// the trailer and the footer from the param string
value = matcher.group(2);
bidx = matcher.start(2);
eidx = matcher.end(2);
if (value.startsWith("<![CDATA[") && value.endsWith("]]>")) {
value = value.substring(9, value.length() - 3); //<![CDATA[ //]]>
} else {
value = getUnescapedValue(value);
}
addParameter(matcher.group(1), bidx, eidx, false, value);
}
}
}