/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.zaproxy.zap.network;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.parosproxy.paros.network.HttpBody;
public class HttpResponseBody extends HttpBody {
private static final Logger log = Logger.getLogger(HttpResponseBody.class);
//private static Pattern patternCharset = Pattern.compile("<META +[^>]+charset=['\"]*([^>'\"])+['\"]*>", Pattern.CASE_INSENSITIVE| Pattern.MULTILINE);
private static final Pattern patternCharset = Pattern.compile("<META +[^>]+charset *= *['\\x22]?([^>'\\x22;]+)['\\x22]? *[/]?>", Pattern.CASE_INSENSITIVE);
/**
* Constructs a {@code HttpResponseBody} with no contents (that is, zero length).
*/
public HttpResponseBody() {
super();
}
/**
* Constructs a {@code HttpResponseBody} with the given initial capacity.
* <p>
* The initial capacity is limited to prevent allocating big arrays.
*
* @param capacity the initial capacity
* @see HttpBody#LIMIT_INITIAL_CAPACITY
*/
public HttpResponseBody(int capacity) {
super(capacity);
}
/**
* Constructs a {@code HttpResponseBody} with the given {@code contents}, using default charset for {@code String} related
* operations.
* <p>
* If the given {@code contents} are {@code null} the {@code HttpResponseBody} will have no content.
* <p>
* <strong>Note:</strong> If the contents are not representable with the default charset it might lead to data loss.
*
* @param contents the contents of the body, might be {@code null}
* @see #HttpResponseBody(byte[])
* @see HttpBody#DEFAULT_CHARSET
*/
public HttpResponseBody(String contents) {
super(contents);
}
/**
* Constructs a {@code HttpResponseBody} with the given {@code contents}.
* <p>
* If the given {@code contents} are {@code null} the {@code HttpResponseBody} will have no content.
*
* @param contents the contents of the body, might be {@code null}
* @since 2.5.0
*/
public HttpResponseBody(byte[] contents) {
super(contents);
}
@Override
protected Charset determineCharset(String contents) {
Matcher matcher = patternCharset.matcher(contents);
if (matcher.find()) {
try {
return Charset.forName(matcher.group(1));
} catch (IllegalArgumentException e) {
log.warn("Unable to determine (valid) charset with the (X)HTML meta charset: " + e.getMessage());
}
} else if (isUtf8String(contents)) {
return StandardCharsets.UTF_8;
}
return null;
}
private static boolean isUtf8String(String string) {
return new String(string.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8).length() == string.length();
}
@Override
protected String createString(Charset currentCharset) {
if (currentCharset != null) {
return super.createString(currentCharset);
}
return createStringWithMetaCharset();
}
private String createStringWithMetaCharset() {
String result = null;
String resultDefaultCharset = null;
try{
resultDefaultCharset = new String(getBytes(), 0, getPos(), StandardCharsets.ISO_8859_1);
Matcher matcher = patternCharset.matcher(resultDefaultCharset);
if (matcher.find()) {
final String charset = matcher.group(1);
result = new String(getBytes(), 0, getPos(), charset);
setCharset(charset);
} else {
String utf8 = toUTF8();
if (utf8 != null) {
// assume to be UTF8
setCharset(StandardCharsets.UTF_8.name());
result = utf8;
} else {
result = resultDefaultCharset;
}
}
} catch(UnsupportedEncodingException e) {
log.error("Unable to encode with the (X)HTML meta charset: " + e.getMessage());
log.warn("Using default charset: " + DEFAULT_CHARSET);
result = resultDefaultCharset;
}
return result;
}
private String toUTF8() {
String utf8 = new String(getBytes(), 0, getPos(), StandardCharsets.UTF_8);
int length2 = utf8.getBytes(StandardCharsets.UTF_8).length;
if (getPos() != length2) {
return null;
}
return utf8;
}
}