/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.olat.core.util.http;
import java.io.IOException;
import java.io.StringReader;
/**
* HTTP header value parser implementation. Parsing HTTP headers as per RFC2616
* is not always as simple as it first appears. For headers that only use tokens
* the simple approach will normally be sufficient. However, for the other
* headers, while simple code meets 99.9% of cases, there are often some edge
* cases that make things far more complicated.
*
* The purpose of this parser is to let the parser worry about the edge cases.
* It provides tolerant (where safe to do so) parsing of HTTP header values
* assuming that wrapped header lines have already been unwrapped. (The Tomcat
* header processing code does the unwrapping.)
*
*/
public class HttpParser {
// Arrays used by isToken(), isHex()
private static final boolean isToken[] = new boolean[128];
private static final boolean isHex[] = new boolean[128];
static {
// Setup the flag arrays
for (int i = 0; i < 128; i++) {
if (i < 32) {
isToken[i] = false;
} else if (i == '(' || i == ')' || i == '<' || i == '>' || i == '@' ||
i == ',' || i == ';' || i == ':' || i == '\\' || i == '\"' ||
i == '/' || i == '[' || i == ']' || i == '?' || i == '=' ||
i == '{' || i == '}' || i == ' ' || i == '\t') {
isToken[i] = false;
} else {
isToken[i] = true;
}
if (i >= '0' && i <= '9' || i >= 'A' && i <= 'F' ||
i >= 'a' && i <= 'f') {
isHex[i] = true;
} else {
isHex[i] = false;
}
}
}
public static String unquote(String input) {
if (input == null || input.length() < 2 || input.charAt(0) != '"') {
return input;
}
StringBuilder result = new StringBuilder();
for (int i = 1 ; i < (input.length() - 1); i++) {
char c = input.charAt(i);
if (input.charAt(i) == '\\') {
i++;
result.append(input.charAt(i));
} else {
result.append(c);
}
}
return result.toString();
}
static boolean isToken(int c) {
// Fast for correct values, slower for incorrect ones
try {
return isToken[c];
} catch (ArrayIndexOutOfBoundsException ex) {
return false;
}
}
static boolean isHex(int c) {
// Fast for correct values, slower for incorrect ones
try {
return isHex[c];
} catch (ArrayIndexOutOfBoundsException ex) {
return false;
}
}
// Skip any LWS and return the next char
static int skipLws(StringReader input, boolean withReset) throws IOException {
if (withReset) {
input.mark(1);
}
int c = input.read();
while (c == 32 || c == 9 || c == 10 || c == 13) {
if (withReset) {
input.mark(1);
}
c = input.read();
}
if (withReset) {
input.reset();
}
return c;
}
static SkipResult skipConstant(StringReader input, String constant) throws IOException {
int len = constant.length();
int c = skipLws(input, false);
for (int i = 0; i < len; i++) {
if (i == 0 && c == -1) {
return SkipResult.EOF;
}
if (c != constant.charAt(i)) {
input.skip(-(i + 1));
return SkipResult.NOT_FOUND;
}
if (i != (len - 1)) {
c = input.read();
}
}
return SkipResult.FOUND;
}
/**
* @return the token if one was found, the empty string if no data was
* available to read or <code>null</code> if data other than a
* token was found
*/
static String readToken(StringReader input) throws IOException {
StringBuilder result = new StringBuilder();
int c = skipLws(input, false);
while (c != -1 && isToken(c)) {
result.append((char) c);
c = input.read();
}
// Skip back so non-token character is available for next read
input.skip(-1);
if (c != -1 && result.length() == 0) {
return null;
} else {
return result.toString();
}
}
/**
* @return the quoted string if one was found, null if data other than a
* quoted string was found or null if the end of data was reached
* before the quoted string was terminated
*/
static String readQuotedString(StringReader input, boolean returnQuoted) throws IOException {
int c = skipLws(input, false);
if (c != '"') {
return null;
}
StringBuilder result = new StringBuilder();
if (returnQuoted) {
result.append('\"');
}
c = input.read();
while (c != '"') {
if (c == -1) {
return null;
} else if (c == '\\') {
c = input.read();
if (returnQuoted) {
result.append('\\');
}
result.append(c);
} else {
result.append((char) c);
}
c = input.read();
}
if (returnQuoted) {
result.append('\"');
}
return result.toString();
}
static String readTokenOrQuotedString(StringReader input, boolean returnQuoted)
throws IOException {
// Go back so first non-LWS character is available to be read again
int c = skipLws(input, true);
if (c == '"') {
return readQuotedString(input, returnQuoted);
} else {
return readToken(input);
}
}
/**
* Token can be read unambiguously with or without surrounding quotes so
* this parsing method for token permits optional surrounding double quotes.
* This is not defined in any RFC. It is a special case to handle data from
* buggy clients (known buggy clients for DIGEST auth include Microsoft IE 8
* & 9, Apple Safari for OSX and iOS) that add quotes to values that
* should be tokens.
*
* @return the token if one was found, null if data other than a token or
* quoted token was found or null if the end of data was reached
* before a quoted token was terminated
*/
static String readQuotedToken(StringReader input) throws IOException {
StringBuilder result = new StringBuilder();
boolean quoted = false;
int c = skipLws(input, false);
if (c == '"') {
quoted = true;
} else if (c == -1 || !isToken(c)) {
return null;
} else {
result.append((char) c);
}
c = input.read();
while (c != -1 && isToken(c)) {
result.append((char) c);
c = input.read();
}
if (quoted) {
if (c != '"') {
return null;
}
} else {
// Skip back so non-token character is available for next read
input.skip(-1);
}
if (c != -1 && result.length() == 0) {
return null;
} else {
return result.toString();
}
}
/**
* LHEX can be read unambiguously with or without surrounding quotes so this
* parsing method for LHEX permits optional surrounding double quotes. Some
* buggy clients (libwww-perl for DIGEST auth) are known to send quoted LHEX
* when the specification requires just LHEX.
*
* <p>
* LHEX are, literally, lower-case hexadecimal digits. This implementation
* allows for upper-case digits as well, converting the returned value to
* lower-case.
*
* @return the sequence of LHEX (minus any surrounding quotes) if any was
* found, or <code>null</code> if data other LHEX was found
*/
static String readLhex(StringReader input) throws IOException {
StringBuilder result = new StringBuilder();
boolean quoted = false;
int c = skipLws(input, false);
if (c == '"') {
quoted = true;
} else if (c == -1 || !isHex(c)) {
return null;
} else {
if ('A' <= c && c <= 'F') {
c -= ('A' - 'a');
}
result.append((char) c);
}
c = input.read();
while (c != -1 && isHex(c)) {
if ('A' <= c && c <= 'F') {
c -= ('A' - 'a');
}
result.append((char) c);
c = input.read();
}
if (quoted) {
if (c != '"') {
return null;
}
} else {
// Skip back so non-hex character is available for next read
input.skip(-1);
}
if (c != -1 && result.length() == 0) {
return null;
} else {
return result.toString();
}
}
static double readWeight(StringReader input, char delimiter) throws IOException {
int c = skipLws(input, false);
if (c == -1 || c == delimiter) {
// No q value just whitespace
return 1;
} else if (c != 'q') {
// Malformed. Use quality of zero so it is dropped.
skipUntil(input, c, delimiter);
return 0;
}
// RFC 7231 does not allow whitespace here but be tolerant
c = skipLws(input, false);
if (c != '=') {
// Malformed. Use quality of zero so it is dropped.
skipUntil(input, c, delimiter);
return 0;
}
// RFC 7231 does not allow whitespace here but be tolerant
c = skipLws(input, false);
// Should be no more than 3 decimal places
StringBuilder value = new StringBuilder(5);
int decimalPlacesRead = 0;
if (c == '0' || c == '1') {
value.append((char) c);
c = input.read();
if (c == '.') {
value.append('.');
} else if (c < '0' || c > '9') {
decimalPlacesRead = 3;
}
while (true) {
c = input.read();
if (c >= '0' && c <= '9') {
if (decimalPlacesRead < 3) {
value.append((char) c);
decimalPlacesRead++;
}
} else if (c == delimiter || c == 9 || c == 32 || c == -1) {
break;
} else {
// Malformed. Use quality of zero so it is dropped and skip until
// EOF or the next delimiter
skipUntil(input, c, delimiter);
return 0;
}
}
} else {
// Malformed. Use quality of zero so it is dropped and skip until
// EOF or the next delimiter
skipUntil(input, c, delimiter);
return 0;
}
double result = Double.parseDouble(value.toString());
if (result > 1) {
return 0;
}
return result;
}
/**
* Skips all characters until EOF or the specified target is found. Normally
* used to skip invalid input until the next separator.
*/
static SkipResult skipUntil(StringReader input, int c, char target) throws IOException {
while (c != -1 && c != target) {
c = input.read();
}
if (c == -1) {
return SkipResult.EOF;
} else {
return SkipResult.FOUND;
}
}
}