/*
* Zed Attack Proxy (ZAP) and its related class files.
*
* ZAP is an HTTP/HTTPS proxy for assessing web application security.
*
* Copyright 2013 The ZAP Development team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.parosproxy.paros.core.scanner;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import org.parosproxy.paros.network.HttpHeader;
import org.parosproxy.paros.network.HttpMessage;
/**
*
* @author andy
*/
public class MultipartFormParser {
public static final String WWW_MULTIPART_FORM_DATA = "multipart/form-data";
public static final String DEFAULT_ENCODING = "ISO-8859-1";
private String boundary = null;
private String encoding = DEFAULT_ENCODING;
private BufferedReader br = null;
/**
* Cannot create a class without initialization!!!
*/
private MultipartFormParser() {}
/**
*
* @param msg
*/
public MultipartFormParser(HttpMessage msg) throws IOException {
// First check if it's a multipart form data request
// Otherwise give back an empty param list
String contentType = msg.getRequestHeader().getHeader(HttpHeader.CONTENT_TYPE);
String line;
if (contentType != null && contentType.startsWith(WWW_MULTIPART_FORM_DATA)) {
// OK now it's time to parse the Multipart Request
// Get the token string; it's included in the content type.
// Should look something like "------------------------12012133613061"
boundary = extractBoundary(contentType);
if (boundary != null) {
br = new BufferedReader(new StringReader(msg.getRequestBody().toString()));
// Read until we hit the token
// Some clients send a preamble (per RFC 2046), so ignore that
// Thanks to Ben Johnson, ben.johnson@merrillcorp.com, for pointing out
// the need for preamble support.
do {
line = br.readLine();
if (line == null) {
throw new IOException("Corrupt form data: premature ending");
}
// See if this line is the token, and if so break
if (line.startsWith(boundary)) {
break; // success
}
} while (true);
} else {
throw new IOException("No boundary defined in the Content-type header");
}
} else {
throw new IOException("The request is not a " + WWW_MULTIPART_FORM_DATA + " content");
}
}
/**
*
* @return
*/
public String getBoundary() {
return boundary;
}
public MultipartParam getNextParam() throws IOException {
MultipartParam param;
String line;
// Read the headers; they look like this (not all may be present):
// Content-Disposition: form-data; name="field1"; filename="file1.txt"
// Content-Type: type/subtype
// Content-Transfer-Encoding: binary
line = br.readLine();
if (line == null) {
// No parts left, we're done
return null;
} else if (line.length() == 0) {
// IE4 on Mac sends an empty line at the end; treat that as the end.
// Thanks to Daniel Lemire and Henri Tourigny for this fix.
return null;
}
param = new MultipartParam();
// Read the following header lines we hit an empty line
// A line starting with whitespace is considered a continuation;
// that requires a little special logic. Thanks to Nic Ferrier for
// identifying a good fix.
while (line != null && line.length() > 0) {
String nextLine = null;
boolean getNextLine = true;
while (getNextLine) {
nextLine = br.readLine();
if ((nextLine != null) && (nextLine.startsWith(" ") || nextLine.startsWith("\t"))) {
line = line + nextLine;
} else {
getNextLine = false;
}
}
// Add the line to the header list
param.addHeader(line);
line = nextLine;
}
// If we got a null above, it's the end
if (line == null) {
return null;
}
// Now, finally, we read the content (end after reading the token)
line = br.readLine();
StringBuilder value = new StringBuilder();
boolean isNotFirst = false;
while (!line.startsWith(boundary)) {
if (isNotFirst) {
value.append(HttpHeader.CRLF);
} else {
isNotFirst = true;
}
value.append(line);
line = br.readLine();
}
param.setContent(value.toString());
return param;
}
/**
* Extracts and returns the token token from a line.
*
* @return the token token.
*/
private String extractBoundary(String line) {
// Use lastIndexOf() because IE 4.01 on Win98 has been known to send the
// "token=" string multiple times. Thanks to David Wall for this fix.
int index = line.lastIndexOf("boundary=");
if (index == -1) {
return null;
}
String token = line.substring(index + 9); // 9 for "token="
if (token.charAt(0) == '"') {
// The token is enclosed in quotes, strip them
index = token.lastIndexOf('"');
token = token.substring(1, index);
}
// The real token is always preceeded by an extra "--"
token = "--" + token;
return token;
}
}