/** * Copyright 2013, Landz and its contributors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package z.net.http.parser; import java.nio.ByteBuffer; import static z.net.http.parser.HTTPParser.C.*; import static z.net.http.parser.HTTPParser.State.*; public class HTTPParser { /* lots of unsigned chars here, not sure what to about them, `bytes` in java suck... */ ParserType type; State state; HState header_state; boolean strict; int index; int flags; // TODO int nread; long content_length; int p_start; // updated each call to execute to indicate where the buffer was before we began calling it. /** * READ-ONLY * */ public int http_major; public int http_minor; public int status_code; /* responses only */ public HTTPMethod method; /* requests only */ /* true = Upgrade header was present and the parser has exited because of that. * false = No upgrade header present. * Should be checked when http_parser_execute() returns in addition to * error checking. */ public boolean upgrade; /** * PUBLIC * */ // TODO : this is used in c to maintain application state. // is this even necessary? we have state in java ? // consider // Object data; /* A pointer to get hook to the "connection" or "socket" object */ /* * technically we could combine all of these (except for url_mark) into one * variable, saving stack space, but it seems more clear to have them * separated. */ int header_field_mark = -1; int header_value_mark = -1; int url_mark = -1; int body_mark = -1; /** * Construct a Parser for ParserType.HTTP_BOTH, meaning it * determines whether it's parsing a request or a response. */ public HTTPParser() { this(ParserType.HTTP_BOTH); } /** * Construct a Parser and initialise it to parse either * requests or responses. */ public HTTPParser(ParserType type) { this.type = type; switch (type) { case HTTP_REQUEST: this.state = State.start_req; break; case HTTP_RESPONSE: this.state = State.start_res; break; case HTTP_BOTH: this.state = State.start_req_or_res; break; default: throw new HTTPException("can't happen, invalid ParserType enum"); } } /* * Utility to facilitate System.out.println style debugging (the way god intended) */ static void p(Object o) { System.out.println(o); } /** * Comment from C version follows * <p> * Our URL parser. * <p> * This is designed to be shared by http_parser_execute() for URL validation, * hence it has a state transition + byte-for-byte interface. In addition, it * is meant to be embedded in http_parser_parse_url(), which does the dirty * work of turning state transitions URL components for its API. * <p> * This function should only be invoked with non-space characters. It is * assumed that the caller cares about (and can detect) the transition between * URL and non-URL states by looking for these. */ public State parse_url_char(byte ch) { int chi = ch & 0xff; // utility, ch without signedness for table lookups. if (SPACE == ch) { throw new HTTPException("space as url char"); } switch (state) { case req_spaces_before_url: /* Proxied requests are followed by scheme of an absolute URI (alpha). * All methods except CONNECT are followed by '/' or '*'. */ if (SLASH == ch || STAR == ch) { return req_path; } if (isAtoZ(ch)) { return req_schema; } break; case req_schema: if (isAtoZ(ch)) { return req_schema; } if (COLON == ch) { return req_schema_slash; } break; case req_schema_slash: if (SLASH == ch) { return req_schema_slash_slash; } break; case req_schema_slash_slash: if (SLASH == ch) { return req_host_start; } break; case req_host_start: if (ch == (byte) '[') { return req_host_v6_start; } if (isHostChar(ch)) { return req_host; } break; case req_host: if (isHostChar(ch)) { return req_host; } /* FALLTHROUGH */ case req_host_v6_end: switch (ch) { case ':': return req_port_start; case '/': return req_path; case '?': return req_query_string_start; } break; case req_host_v6: if (ch == ']') { return req_host_v6_end; } /* FALLTHROUGH */ case req_host_v6_start: if (isHex(ch) || ch == ':') { return req_host_v6; } break; case req_port: switch (ch) { case '/': return req_path; case '?': return req_query_string_start; } /* FALLTHROUGH */ case req_port_start: if (isDigit(ch)) { return req_port; } break; case req_path: if (isNormalUrlChar(chi)) { return req_path; } switch (ch) { case '?': return req_query_string_start; case '#': return req_fragment_start; } break; case req_query_string_start: case req_query_string: if (isNormalUrlChar(chi)) { return req_query_string; } switch (ch) { case '?': /* allow extra '?' in query string */ return req_query_string; case '#': return req_fragment_start; } break; case req_fragment_start: if (isNormalUrlChar(chi)) { return req_fragment; } switch (ch) { case '?': return req_fragment; case '#': return req_fragment_start; } break; case req_fragment: if (isNormalUrlChar(ch)) { return req_fragment; } switch (ch) { case '?': case '#': return req_fragment; } break; default: break; } /* We should never fall out of the switch above unless there's an error */ return dead; } /** * Execute the parser with the currently available data contained in * the buffer. The buffers position() and limit() need to be set * correctly (obviously) and a will be updated approriately when the * method returns to reflect the consumed data. */ public int execute(ParserSettings settings, ByteBuffer data) { int p = data.position(); this.p_start = p; // this is used for pretty printing errors. // and returning the amount of processed bytes. // In case the headers don't provide information about the content // length, `execute` needs to be called with an empty buffer to // indicate that all the data has been send be the client/server, // else there is no way of knowing the message is complete. int len = (data.limit() - data.position()); if (0 == len) { // if (State.body_identity_eof == state) { // settings.call_on_message_complete(this); // } switch (state) { case body_identity_eof: settings.call_on_message_complete(this); return data.position() - this.p_start; case dead: case start_req_or_res: case start_res: case start_req: return data.position() - this.p_start; default: // should we really consider this an error!? throw new HTTPException("empty bytes! " + state); // error } } // in case the _previous_ call to the parser only has data to get to // the middle of certain fields, we need to update marks to point at // the beginning of the current buffer. switch (state) { case header_field: header_field_mark = p; break; case header_value: header_value_mark = p; break; case req_path: case req_schema: case req_schema_slash: case req_schema_slash_slash: case req_host_start: case req_host_v6_start: case req_host_v6: case req_host_v6_end: case req_host: case req_port_start: case req_port: case req_query_string_start: case req_query_string: case req_fragment_start: case req_fragment: url_mark = p; break; } boolean reexecute = false; int pe = 0; byte ch = 0; int chi = 0; byte c = -1; int to_read = 0; // this is where the work gets done, traverse the available data... while (data.position() != data.limit() || reexecute) { // p(state + ": r: " + reexecute + " :: " +p ); if (!reexecute) { p = data.position(); pe = data.limit(); ch = data.get(); // the current character to process. chi = ch & 0xff; // utility, ch without signedness for table lookups. c = -1; // utility variably used for up- and downcasing etc. to_read = 0; // used to keep track of how much of body, etc. is left to read if (parsing_header(state)) { ++nread; if (nread > HTTP_MAX_HEADER_SIZE) { return error(settings, "possible buffer overflow", data); } } } reexecute = false; // p(state + " ::: " + ch + " : " + (((CR == ch) || (LF == ch)) ? ch : ("'" + (char)ch + "'")) +": "+p ); switch (state) { /* * this state is used after a 'Connection: close' message * the parser will error out if it reads another message */ case dead: if (CR == ch || LF == ch) { break; } return error(settings, "Connection already closed", data); case start_req_or_res: if (CR == ch || LF == ch) { break; } flags = 0; content_length = -1; if (H == ch) { state = State.res_or_resp_H; settings.call_on_message_begin(this); } else { type = ParserType.HTTP_REQUEST; method = start_req_method_assign(ch); if (null == method) { return error(settings, "invalid method", data); } index = 1; state = State.req_method; } break; case res_or_resp_H: if (T == ch) { type = ParserType.HTTP_RESPONSE; state = State.res_HT; } else { if (E != ch) { return error(settings, "not E", data); } type = ParserType.HTTP_REQUEST; method = HTTPMethod.HTTP_HEAD; index = 2; state = State.req_method; } break; case start_res: flags = 0; content_length = -1; switch (ch) { case H: state = State.res_H; break; case CR: case LF: break; default: return error(settings, "Not H or CR/LF", data); } settings.call_on_message_begin(this); break; case res_H: if (strict && T != ch) { return error(settings, "Not T", data); } state = State.res_HT; break; case res_HT: if (strict && T != ch) { return error(settings, "Not T2", data); } state = State.res_HTT; break; case res_HTT: if (strict && P != ch) { return error(settings, "Not P", data); } state = State.res_HTTP; break; case res_HTTP: if (strict && SLASH != ch) { return error(settings, "Not '/'", data); } state = State.res_first_http_major; break; case res_first_http_major: if (!isDigit(ch)) { return error(settings, "Not a digit", data); } http_major = (int) ch - 0x30; state = State.res_http_major; break; /* major HTTP version or dot */ case res_http_major: if (DOT == ch) { state = State.res_first_http_minor; break; } if (!isDigit(ch)) { return error(settings, "Not a digit", data); } http_major *= 10; http_major += (ch - 0x30); if (http_major > 999) { return error(settings, "invalid http major version: ", data); } break; /* first digit of minor HTTP version */ case res_first_http_minor: if (!isDigit(ch)) { return error(settings, "Not a digit", data); } http_minor = (int) ch - 0x30; state = State.res_http_minor; break; /* minor HTTP version or end of request line */ case res_http_minor: if (SPACE == ch) { state = State.res_first_status_code; break; } if (!isDigit(ch)) { return error(settings, "Not a digit", data); } http_minor *= 10; http_minor += (ch - 0x30); if (http_minor > 999) { return error(settings, "invalid http minor version: ", data); } break; case res_first_status_code: if (!isDigit(ch)) { if (SPACE == ch) { break; } return error(settings, "Not a digit (status code)", data); } status_code = (int) ch - 0x30; state = State.res_status_code; break; case res_status_code: if (!isDigit(ch)) { switch (ch) { case SPACE: state = State.res_status; break; case CR: state = State.res_line_almost_done; break; case LF: state = State.header_field_start; break; default: return error(settings, "not a valid status code", data); } break; } status_code *= 10; status_code += (int) ch - 0x30; if (status_code > 999) { return error(settings, "ridiculous status code:", data); } if (status_code > 99) { settings.call_on_status_complete(this); } break; case res_status: /* the human readable status. e.g. "NOT FOUND" * we are not humans so just ignore this * we are not men, we are devo. */ if (CR == ch) { state = State.res_line_almost_done; break; } if (LF == ch) { state = State.header_field_start; break; } break; case res_line_almost_done: if (strict && LF != ch) { return error(settings, "not LF", data); } state = State.header_field_start; break; case start_req: if (CR == ch || LF == ch) { break; } flags = 0; content_length = -1; if (!isAtoZ(ch)) { return error(settings, "invalid method", data); } method = start_req_method_assign(ch); if (null == method) { return error(settings, "invalid method", data); } index = 1; state = State.req_method; settings.call_on_message_begin(this); break; case req_method: if (0 == ch) { return error(settings, "NULL in method", data); } byte[] arr = method.bytes; if (SPACE == ch && index == arr.length) { state = State.req_spaces_before_url; } else if (arr[index] == ch) { // wuhu! } else if (HTTPMethod.HTTP_CONNECT == method) { if (1 == index && H == ch) { method = HTTPMethod.HTTP_CHECKOUT; } else if (2 == index && P == ch) { method = HTTPMethod.HTTP_COPY; } } else if (HTTPMethod.HTTP_MKCOL == method) { if (1 == index && O == ch) { method = HTTPMethod.HTTP_MOVE; } else if (1 == index && E == ch) { method = HTTPMethod.HTTP_MERGE; } else if (1 == index && DASH == ch) { /* M-SEARCH */ method = HTTPMethod.HTTP_MSEARCH; } else if (2 == index && A == ch) { method = HTTPMethod.HTTP_MKACTIVITY; } } else if (1 == index && HTTPMethod.HTTP_POST == method) { if (R == ch) { method = HTTPMethod.HTTP_PROPFIND; /* or HTTP_PROPPATCH */ } else if (U == ch) { method = HTTPMethod.HTTP_PUT; /* or HTTP_PURGE */ } else if (A == ch) { method = HTTPMethod.HTTP_PATCH; } } else if (2 == index) { if (HTTPMethod.HTTP_PUT == method) { if (R == ch) { method = HTTPMethod.HTTP_PURGE; } } else if (HTTPMethod.HTTP_UNLOCK == method) { if (S == ch) { method = HTTPMethod.HTTP_UNSUBSCRIBE; } } } else if (4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch) { method = HTTPMethod.HTTP_PROPPATCH; } else { return error(settings, "Invalid HTTP method", data); } ++index; break; /******************* URL *******************/ case req_spaces_before_url: if (SPACE == ch) { break; } url_mark = p; if (HTTPMethod.HTTP_CONNECT == method) { state = req_host_start; } state = parse_url_char(ch); if (state == dead) { return error(settings, "Invalid something", data); } break; case req_schema: case req_schema_slash: case req_schema_slash_slash: case req_host_start: case req_host_v6_start: case req_host_v6: case req_port_start: switch (ch) { /* No whitespace allowed here */ case SPACE: case CR: case LF: return error(settings, "unexpected char in path", data); default: state = parse_url_char(ch); if (dead == state) { return error(settings, "unexpected char in path", data); } } break; case req_host: case req_host_v6_end: case req_port: case req_path: case req_query_string_start: case req_query_string: case req_fragment_start: case req_fragment: switch (ch) { case SPACE: settings.call_on_url(this, data, url_mark, p - url_mark); settings.call_on_path(this, data, url_mark, p - url_mark); url_mark = -1; state = State.req_http_start; break; case CR: case LF: http_major = 0; http_minor = 9; state = (CR == ch) ? req_line_almost_done : header_field_start; settings.call_on_url(this, data, url_mark, p - url_mark); //TODO check params!!! settings.call_on_path(this, data, url_mark, p - url_mark); url_mark = -1; break; default: state = parse_url_char(ch); if (dead == state) { return error(settings, "unexpected char in path", data); } } break; /******************* URL *******************/ /******************* HTTP 1.1 *******************/ case req_http_start: switch (ch) { case H: state = State.req_http_H; break; case SPACE: break; default: return error(settings, "error in req_http_H", data); } break; case req_http_H: if (strict && T != ch) { return error(settings, "unexpected char", data); } state = State.req_http_HT; break; case req_http_HT: if (strict && T != ch) { return error(settings, "unexpected char", data); } state = State.req_http_HTT; break; case req_http_HTT: if (strict && P != ch) { return error(settings, "unexpected char", data); } state = State.req_http_HTTP; break; case req_http_HTTP: if (strict && SLASH != ch) { return error(settings, "unexpected char", data); } state = req_first_http_major; break; /* first digit of major HTTP version */ case req_first_http_major: if (!isDigit(ch)) { return error(settings, "non digit in http major", data); } http_major = (int) ch - 0x30; state = State.req_http_major; break; /* major HTTP version or dot */ case req_http_major: if (DOT == ch) { state = State.req_first_http_minor; break; } if (!isDigit(ch)) { return error(settings, "non digit in http major", data); } http_major *= 10; http_major += (int) ch - 0x30; if (http_major > 999) { return error(settings, "ridiculous http major", data); } ; break; /* first digit of minor HTTP version */ case req_first_http_minor: if (!isDigit(ch)) { return error(settings, "non digit in http minor", data); } http_minor = (int) ch - 0x30; state = State.req_http_minor; break; case req_http_minor: if (ch == CR) { state = State.req_line_almost_done; break; } if (ch == LF) { state = State.header_field_start; break; } /* XXX allow spaces after digit? */ if (!isDigit(ch)) { return error(settings, "non digit in http minor", data); } http_minor *= 10; http_minor += (int) ch - 0x30; if (http_minor > 999) { return error(settings, "ridiculous http minor", data); } ; break; /* end of request line */ case req_line_almost_done: { if (ch != LF) { return error(settings, "missing LF after request line", data); } state = header_field_start; break; } /******************* HTTP 1.1 *******************/ /******************* Header *******************/ case header_field_start: { if (ch == CR) { state = headers_almost_done; break; } if (ch == LF) { /* they might be just sending \n instead of \r\n so this would be * the second \n to denote the end of headers*/ state = State.headers_almost_done; reexecute = true; break; } c = token(ch); if (0 == c) { return error(settings, "invalid char in header:", data); } header_field_mark = p; index = 0; state = State.header_field; switch (c) { case C: header_state = HState.C; break; case P: header_state = HState.matching_proxy_connection; break; case T: header_state = HState.matching_transfer_encoding; break; case U: header_state = HState.matching_upgrade; break; default: header_state = HState.general; break; } break; } case header_field: { c = token(ch); if (0 != c) { switch (header_state) { case general: break; case C: index++; header_state = (O == c ? HState.CO : HState.general); break; case CO: index++; header_state = (N == c ? HState.CON : HState.general); break; case CON: index++; switch (c) { case N: header_state = HState.matching_connection; break; case T: header_state = HState.matching_content_length; break; default: header_state = HState.general; break; } break; /* connection */ case matching_connection: index++; if (index > CONNECTION.length || c != CONNECTION[index]) { header_state = HState.general; } else if (index == CONNECTION.length - 1) { header_state = HState.connection; } break; /* proxy-connection */ case matching_proxy_connection: index++; if (index > PROXY_CONNECTION.length || c != PROXY_CONNECTION[index]) { header_state = HState.general; } else if (index == PROXY_CONNECTION.length - 1) { header_state = HState.connection; } break; /* content-length */ case matching_content_length: index++; if (index > CONTENT_LENGTH.length || c != CONTENT_LENGTH[index]) { header_state = HState.general; } else if (index == CONTENT_LENGTH.length - 1) { header_state = HState.content_length; } break; /* transfer-encoding */ case matching_transfer_encoding: index++; if (index > TRANSFER_ENCODING.length || c != TRANSFER_ENCODING[index]) { header_state = HState.general; } else if (index == TRANSFER_ENCODING.length - 1) { header_state = HState.transfer_encoding; } break; /* upgrade */ case matching_upgrade: index++; if (index > UPGRADE.length || c != UPGRADE[index]) { header_state = HState.general; } else if (index == UPGRADE.length - 1) { header_state = HState.upgrade; } break; case connection: case content_length: case transfer_encoding: case upgrade: if (SPACE != ch) header_state = HState.general; break; default: return error(settings, "Unknown Header State", data); } // switch: header_state break; } // 0 != c if (COLON == ch) { settings.call_on_header_field(this, data, header_field_mark, p - header_field_mark); header_field_mark = -1; state = State.header_value_start; break; } if (CR == ch) { state = State.header_almost_done; settings.call_on_header_field(this, data, header_field_mark, p - header_field_mark); header_field_mark = -1; break; } if (ch == LF) { settings.call_on_header_field(this, data, header_field_mark, p - header_field_mark); header_field_mark = -1; state = State.header_field_start; break; } return error(settings, "invalid header field", data); } case header_value_start: { if ((SPACE == ch) || (TAB == ch)) break; header_value_mark = p; state = State.header_value; index = 0; if (CR == ch) { settings.call_on_header_value(this, data, header_value_mark, p - header_value_mark); header_value_mark = -1; header_state = HState.general; state = State.header_almost_done; break; } if (LF == ch) { settings.call_on_header_value(this, data, header_value_mark, p - header_value_mark); header_value_mark = -1; state = State.header_field_start; break; } c = upper(ch); switch (header_state) { case upgrade: flags |= F_UPGRADE; header_state = HState.general; break; case transfer_encoding: /* looking for 'Transfer-Encoding: chunked' */ if (C == c) { header_state = HState.matching_transfer_encoding_chunked; } else { header_state = HState.general; } break; case content_length: if (!isDigit(ch)) { return error(settings, "Content-Length not numeric", data); } content_length = (int) ch - 0x30; break; case connection: /* looking for 'Connection: keep-alive' */ if (K == c) { header_state = HState.matching_connection_keep_alive; /* looking for 'Connection: close' */ } else if (C == c) { header_state = HState.matching_connection_close; } else { header_state = HState.general; } break; default: header_state = HState.general; break; } break; } // header value start case header_value: { if (CR == ch) { settings.call_on_header_value(this, data, header_value_mark, p - header_value_mark); header_value_mark = -1; state = State.header_almost_done; break; } if (LF == ch) { settings.call_on_header_value(this, data, header_value_mark, p - header_value_mark); header_value_mark = -1; state = header_almost_done; reexecute = true; break; } c = upper(ch); switch (header_state) { case general: break; case connection: case transfer_encoding: return error(settings, "Shouldn't be here", data); case content_length: if (SPACE == ch) { break; } if (!isDigit(ch)) { return error(settings, "Content-Length not numeric", data); } long t = content_length; t *= 10; t += (long) ch - 0x30; /* Overflow? */ // t will wrap and become negative ... if (t < content_length) { return error(settings, "Invalid content length", data); } content_length = t; break; /* Transfer-Encoding: chunked */ case matching_transfer_encoding_chunked: index++; if (index > CHUNKED.length || c != CHUNKED[index]) { header_state = HState.general; } else if (index == CHUNKED.length - 1) { header_state = HState.transfer_encoding_chunked; } break; /* looking for 'Connection: keep-alive' */ case matching_connection_keep_alive: index++; if (index > KEEP_ALIVE.length || c != KEEP_ALIVE[index]) { header_state = HState.general; } else if (index == KEEP_ALIVE.length - 1) { header_state = HState.connection_keep_alive; } break; /* looking for 'Connection: close' */ case matching_connection_close: index++; if (index > CLOSE.length || c != CLOSE[index]) { header_state = HState.general; } else if (index == CLOSE.length - 1) { header_state = HState.connection_close; } break; case transfer_encoding_chunked: case connection_keep_alive: case connection_close: if (SPACE != ch) header_state = HState.general; break; default: state = State.header_value; header_state = HState.general; break; } break; } // header_value case header_almost_done: if (!header_almost_done(ch)) { return error(settings, "incorrect header ending, expecting LF", data); } break; case header_value_lws: if (SPACE == ch || TAB == ch) { state = header_value_start; } else { state = header_field_start; reexecute = true; } break; case headers_almost_done: if (LF != ch) { return error(settings, "header not properly completed", data); } if (0 != (flags & F_TRAILING)) { /* End of a chunked request */ state = new_message(); settings.call_on_headers_complete(this); settings.call_on_message_complete(this); break; } state = headers_done; if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) { upgrade = true; } /* Here we call the headers_complete callback. This is somewhat * different than other callbacks because if the user returns 1, we * will interpret that as saying that this message has no body. This * is needed for the annoying case of recieving a response to a HEAD * request. */ /* (responses to HEAD request contain a CONTENT-LENGTH header * but no content) * * Consider what to do here: I don't like the idea of the callback * interface having a different contract in the case of HEAD * responses. The alternatives would be either to: * * a.) require the header_complete callback to implement a different * interface or * * b.) provide an overridden execute(bla, bla, boolean * parsingHeader) implementation ... */ /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */ if (null != settings.on_headers_complete) { settings.call_on_headers_complete(this); //return; } // if (null != settings.on_headers_complete) { // switch (settings.on_headers_complete.cb(parser)) { // case 0: // break; // // case 1: // flags |= F_SKIPBODY; // break; // // default: // return p - data; /* Error */ // TODO // RuntimeException ? // } // } reexecute = true; break; case headers_done: if (strict && (LF != ch)) { return error(settings, "STRICT CHECK", data); //TODO correct error msg } nread = 0; // Exit, the rest of the connect is in a different protocol. if (upgrade) { state = new_message(); settings.call_on_message_complete(this); return data.position() - this.p_start; } if (0 != (flags & F_SKIPBODY)) { state = new_message(); settings.call_on_message_complete(this); } else if (0 != (flags & F_CHUNKED)) { /* chunked encoding - ignore Content-Length header */ state = State.chunk_size_start; } else { if (content_length == 0) { /* Content-Length header given but zero: Content-Length: 0\r\n */ state = new_message(); settings.call_on_message_complete(this); } else if (content_length != -1) { /* Content-Length header given and non-zero */ state = State.body_identity; } else { if (type == ParserType.HTTP_REQUEST || !http_message_needs_eof()) { /* Assume content-length 0 - read the next */ state = new_message(); settings.call_on_message_complete(this); } else { /* Read body until EOF */ state = State.body_identity_eof; } } } break; /******************* Header *******************/ /******************* Body *******************/ case body_identity: to_read = min(pe - p, content_length); //TODO change to use buffer? body_mark = p; if (to_read > 0) { settings.call_on_body(this, data, p, to_read); data.position(p + to_read); content_length -= to_read; if (content_length == 0) { state = message_done; reexecute = true; } } break; case body_identity_eof: to_read = pe - p; // TODO change to use buffer ? if (to_read > 0) { settings.call_on_body(this, data, p, to_read); data.position(p + to_read); } break; case message_done: state = new_message(); settings.call_on_message_complete(this); break; /******************* Body *******************/ /******************* Chunk *******************/ case chunk_size_start: if (1 != this.nread) { return error(settings, "nread != 1 (chunking)", data); } if (0 == (flags & F_CHUNKED)) { return error(settings, "not chunked", data); } c = UNHEX[chi]; if (c == -1) { return error(settings, "invalid hex char in chunk content length", data); } content_length = c; state = State.chunk_size; break; case chunk_size: if (0 == (flags & F_CHUNKED)) { return error(settings, "not chunked", data); } if (CR == ch) { state = State.chunk_size_almost_done; break; } c = UNHEX[chi]; if (c == -1) { if (SEMI == ch || SPACE == ch) { state = State.chunk_parameters; break; } return error(settings, "invalid hex char in chunk content length", data); } long t = content_length; t *= 16; t += c; if (t < content_length) { return error(settings, "invalid content length", data); } content_length = t; break; case chunk_parameters: if (0 == (flags & F_CHUNKED)) { return error(settings, "not chunked", data); } /* just ignore this shit. TODO check for overflow */ if (CR == ch) { state = State.chunk_size_almost_done; break; } break; case chunk_size_almost_done: if (0 == (flags & F_CHUNKED)) { return error(settings, "not chunked", data); } if (strict && LF != ch) { return error(settings, "expected LF at end of chunk size", data); } this.nread = 0; if (0 == content_length) { flags |= F_TRAILING; state = State.header_field_start; } else { state = State.chunk_data; } break; case chunk_data: //TODO Apply changes from C version for s_chunk_data if (0 == (flags & F_CHUNKED)) { return error(settings, "not chunked", data); } to_read = min(pe - p, content_length); if (to_read > 0) { settings.call_on_body(this, data, p, to_read); data.position(p + to_read); } if (to_read == content_length) { state = State.chunk_data_almost_done; } content_length -= to_read; break; case chunk_data_almost_done: if (0 == (flags & F_CHUNKED)) { return error(settings, "not chunked", data); } if (strict && CR != ch) { return error(settings, "chunk data terminated incorrectly, expected CR", data); } state = State.chunk_data_done; //TODO CALLBACK_DATA(body) // settings.call_on_body(this, data,p,?); break; case chunk_data_done: if (0 == (flags & F_CHUNKED)) { return error(settings, "not chunked", data); } if (strict && LF != ch) { return error(settings, "chunk data terminated incorrectly, expected LF", data); } state = State.chunk_size_start; break; /******************* Chunk *******************/ default: return error(settings, "unhandled state", data); } // switch } // while p = data.position(); /* Reaching this point assumes that we only received part of a * message, inform the callbacks about the progress made so far*/ settings.call_on_header_field(this, data, header_field_mark, p - header_field_mark); settings.call_on_header_value(this, data, header_value_mark, p - header_value_mark); settings.call_on_url(this, data, url_mark, p - url_mark); settings.call_on_path(this, data, url_mark, p - url_mark); return data.position() - this.p_start; } // execute int error(ParserSettings settings, String mes, ByteBuffer data) { settings.call_on_error(this, mes, data, this.p_start); this.state = State.dead; return data.position() - this.p_start; } public boolean http_message_needs_eof() { if (type == ParserType.HTTP_REQUEST) { return false; } /* See RFC 2616 section 4.4 */ if ((status_code / 100 == 1) || /* 1xx e.g. Continue */ (status_code == 204) || /* No Content */ (status_code == 304) || /* Not Modified */ (flags & F_SKIPBODY) != 0) { /* response to a HEAD request */ return false; } if ((flags & F_CHUNKED) != 0 || content_length != -1) { return false; } return true; } /* If http_should_keep_alive() in the on_headers_complete or * on_message_complete callback returns true, then this will be should be * the last message on the connection. * If you are the server, respond with the "Connection: close" header. * If you are the client, close the connection. */ public boolean http_should_keep_alive() { if (http_major > 0 && http_minor > 0) { /* HTTP/1.1 */ if (0 != (flags & F_CONNECTION_CLOSE)) { return false; } } else { /* HTTP/1.0 or earlier */ if (0 == (flags & F_CONNECTION_KEEP_ALIVE)) { return false; } } return !http_message_needs_eof(); } public int parse_url(ByteBuffer data, boolean is_connect, HTTPParserUrl u) { UrlFields uf = UrlFields.UF_MAX; UrlFields old_uf = UrlFields.UF_MAX; u.port = 0; u.field_set = 0; state = (is_connect ? State.req_host_start : State.req_spaces_before_url); int p_init = data.position(); int p = 0; byte ch = 0; while (data.position() != data.limit()) { p = data.position(); ch = data.get(); state = parse_url_char(ch); switch (state) { case dead: return 1; /* Skip delimeters */ case req_schema_slash: case req_schema_slash_slash: case req_host_start: case req_host_v6_start: case req_host_v6_end: case req_port_start: case req_query_string_start: case req_fragment_start: continue; case req_schema: uf = UrlFields.UF_SCHEMA; break; case req_host: case req_host_v6: uf = UrlFields.UF_HOST; break; case req_port: uf = UrlFields.UF_PORT; break; case req_path: uf = UrlFields.UF_PATH; break; case req_query_string: uf = UrlFields.UF_QUERY; break; case req_fragment: uf = UrlFields.UF_FRAGMENT; break; default: return 1; } /* Nothing's changed; soldier on */ if (uf == old_uf) { u.field_data[uf.getIndex()].len++; continue; } u.field_data[uf.getIndex()].off = p - p_init; u.field_data[uf.getIndex()].len = 1; u.field_set |= (1 << uf.getIndex()); old_uf = uf; } /* CONNECT requests can only contain "hostname:port" */ if (is_connect && u.field_set != ((1 << UrlFields.UF_HOST.getIndex()) | (1 << UrlFields.UF_PORT.getIndex()))) { return 1; } /* Make sure we don't end somewhere unexpected */ switch (state) { case req_host_v6_start: case req_host_v6: case req_host_v6_end: case req_host: case req_port_start: return 1; default: break; } if (0 != (u.field_set & (1 << UrlFields.UF_PORT.getIndex()))) { /* Don't bother with endp; we've already validated the string */ int v = strtoi(data, p_init + u.field_data[UrlFields.UF_PORT.getIndex()].off); /* Ports have a max value of 2^16 */ if (v > 0xffff) { return 1; } u.port = v; } return 0; } //hacky reimplementation of srttoul, tailored for our simple needs //we only need to parse port val, so no negative values etc int strtoi(ByteBuffer data, int start_pos) { data.position(start_pos); byte ch; String str = ""; while (data.position() < data.limit()) { ch = data.get(); if (Character.isWhitespace((char) ch)) { continue; } if (isDigit(ch)) { str = str + (char) ch; //TODO replace with something less hacky } else { break; } } return Integer.parseInt(str); } boolean isDigit(byte b) { if (b >= 0x30 && b <= 0x39) { return true; } return false; } boolean isHex(byte b) { return isDigit(b) || (lower(b) >= 0x61 /*a*/ && lower(b) <= 0x66 /*f*/); } boolean isAtoZ(byte b) { byte c = lower(b); return (c >= 0x61 /*a*/ && c <= 0x7a /*z*/); } byte lower(byte b) { return (byte) (b | 0x20); } byte upper(byte b) { char c = (char) (b); return (byte) Character.toUpperCase(c); } byte token(byte b) { if (!strict) { return (b == (byte) ' ') ? (byte) ' ' : (byte) tokens[b]; } else { return (byte) tokens[b]; } } boolean isHostChar(byte ch) { if (!strict) { return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch || UNDER == ch; } else { return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch; } } boolean isNormalUrlChar(int chi) { if (!strict) { return (chi > 0x80) || normal_url_char[chi]; } else { return normal_url_char[chi]; } } HTTPMethod start_req_method_assign(byte c) { switch (c) { case C: return HTTPMethod.HTTP_CONNECT; /* or COPY, CHECKOUT */ case D: return HTTPMethod.HTTP_DELETE; case G: return HTTPMethod.HTTP_GET; case H: return HTTPMethod.HTTP_HEAD; case L: return HTTPMethod.HTTP_LOCK; case M: return HTTPMethod.HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ case N: return HTTPMethod.HTTP_NOTIFY; case O: return HTTPMethod.HTTP_OPTIONS; case P: return HTTPMethod.HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ case R: return HTTPMethod.HTTP_REPORT; case S: return HTTPMethod.HTTP_SUBSCRIBE; case T: return HTTPMethod.HTTP_TRACE; case U: return HTTPMethod.HTTP_UNLOCK; /* or UNSUBSCRIBE */ } return null; // ugh. } boolean header_almost_done(byte ch) { if (strict && LF != ch) { return false; } state = State.header_value_lws; // TODO java enums support some sort of bitflag mechanism !? switch (header_state) { case connection_keep_alive: flags |= F_CONNECTION_KEEP_ALIVE; break; case connection_close: flags |= F_CONNECTION_CLOSE; break; case transfer_encoding_chunked: flags |= F_CHUNKED; break; default: break; } return true; } // boolean headers_almost_done (byte ch, ParserSettings settings) { // } // headers_almost_done final int min(int a, int b) { return a < b ? a : b; } final int min(int a, long b) { return a < b ? a : (int) b; } /* probably not the best place to hide this ... */ public boolean HTTP_PARSER_STRICT; State new_message() { if (HTTP_PARSER_STRICT) { return http_should_keep_alive() ? start_state() : State.dead; } else { return start_state(); } } State start_state() { return type == ParserType.HTTP_REQUEST ? State.start_req : State.start_res; } boolean parsing_header(State state) { switch (state) { case chunk_data: case chunk_data_almost_done: case chunk_data_done: case body_identity: case body_identity_eof: case message_done: return false; } return true; } /* "Dial C for Constants" */ static class C { static final int HTTP_MAX_HEADER_SIZE = 80 * 1024; static final int F_CHUNKED = 1 << 0; static final int F_CONNECTION_KEEP_ALIVE = 1 << 1; static final int F_CONNECTION_CLOSE = 1 << 2; static final int F_TRAILING = 1 << 3; static final int F_UPGRADE = 1 << 4; static final int F_SKIPBODY = 1 << 5; static final byte[] UPCASE = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static final byte[] CONNECTION = { 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, }; static final byte[] PROXY_CONNECTION = { 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, }; static final byte[] CONTENT_LENGTH = { 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, }; static final byte[] TRANSFER_ENCODING = { 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, }; static final byte[] UPGRADE = { 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45, }; static final byte[] CHUNKED = { 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44, }; static final byte[] KEEP_ALIVE = { 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45, }; static final byte[] CLOSE = { 0x43, 0x4c, 0x4f, 0x53, 0x45, }; /* Tokens as defined by rfc 2616. Also lowercases them. * token = 1*<any CHAR except CTLs or separators> * separators = "(" | ")" | "<" | ">" | "@" * | "," | ";" | ":" | "\" | <"> * | "/" | "[" | "]" | "?" | "=" * | "{" | "}" | SP | HT */ static final char[] tokens = { /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 0, 0, 0, 0, 0, 0, 0, 0, /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 0, 0, 0, 0, 0, 0, 0, 0, /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 0, 0, 0, 0, 0, 0, 0, 0, /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 0, 0, 0, 0, 0, 0, 0, 0, /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 0, '!', 0, '#', '$', '%', '&', '\'', /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 0, 0, '*', '+', 0, '-', '.', 0, /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ '0', '1', '2', '3', '4', '5', '6', '7', /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ '8', '9', 0, 0, 0, 0, 0, 0, /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 'X', 'Y', 'Z', 0, 0, 0, 0, '_', /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 'X', 'Y', 'Z', 0, '|', 0, '~', 0, /* hi bit set, not ascii */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}; static final byte[] UNHEX = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1 , -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; static final boolean[] normal_url_char = { /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ false, false, false, false, false, false, false, false, /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ false, false, false, false, false, false, false, false, /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ false, false, false, false, false, false, false, false, /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ false, false, false, false, false, false, false, false, /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ false, true, true, false, true, true, true, true, /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ true, true, true, true, true, true, true, true, /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ true, true, true, true, true, true, true, true, /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ true, true, true, true, true, true, true, false, /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ true, true, true, true, true, true, true, true, /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ true, true, true, true, true, true, true, true, /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ true, true, true, true, true, true, true, true, /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ true, true, true, true, true, true, true, true, /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ true, true, true, true, true, true, true, true, /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ true, true, true, true, true, true, true, true, /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ true, true, true, true, true, true, true, true, /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ true, true, true, true, true, true, true, false, /* hi bit set, not ascii */ /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8 * encoded paths. This is out of spec, but clients generate this and most other * HTTP servers support it. We should, too. */ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, }; public static final byte A = 0x41; public static final byte B = 0x42; public static final byte C = 0x43; public static final byte D = 0x44; public static final byte E = 0x45; public static final byte F = 0x46; public static final byte G = 0x47; public static final byte H = 0x48; public static final byte I = 0x49; public static final byte J = 0x4a; public static final byte K = 0x4b; public static final byte L = 0x4c; public static final byte M = 0x4d; public static final byte N = 0x4e; public static final byte O = 0x4f; public static final byte P = 0x50; public static final byte Q = 0x51; public static final byte R = 0x52; public static final byte S = 0x53; public static final byte T = 0x54; public static final byte U = 0x55; public static final byte V = 0x56; public static final byte W = 0x57; public static final byte X = 0x58; public static final byte Y = 0x59; public static final byte Z = 0x5a; public static final byte UNDER = 0x5f; public static final byte CR = 0x0d; public static final byte LF = 0x0a; public static final byte DOT = 0x2e; public static final byte SPACE = 0x20; public static final byte TAB = 0x09; public static final byte SEMI = 0x3b; public static final byte COLON = 0x3a; public static final byte HASH = 0x23; public static final byte QMARK = 0x3f; public static final byte SLASH = 0x2f; public static final byte DASH = 0x2d; public static final byte STAR = 0x2a; public static final byte NULL = 0x00; } enum State { dead, start_req_or_res, res_or_resp_H, start_res, res_H, res_HT, res_HTT, res_HTTP, res_first_http_major, res_http_major, res_first_http_minor, res_http_minor, res_first_status_code, res_status_code, res_status, res_line_almost_done, start_req, req_method, req_spaces_before_url, req_schema, req_schema_slash, req_schema_slash_slash, req_host_start, req_host_v6_start, req_host_v6, req_host_v6_end, req_host, req_port_start, req_port, req_path, req_query_string_start, req_query_string, req_fragment_start, req_fragment, req_http_start, req_http_H, req_http_HT, req_http_HTT, req_http_HTTP, req_first_http_major, req_http_major, req_first_http_minor, req_http_minor, req_line_almost_done, header_field_start, header_field, header_value_start, header_value, header_value_lws, header_almost_done, chunk_size_start, chunk_size, chunk_parameters, chunk_size_almost_done, headers_almost_done, headers_done // This space intentionally not left blank, comment from c, for orientation... // the c version uses <= s_header_almost_done in java, we list the states explicitly // in `parsing_header()` /* Important: 's_headers_done' must be the last 'header' state. All * states beyond this must be 'body' states. It is used for overflow * checking. See the PARSING_HEADER() macro. */, chunk_data, chunk_data_almost_done, chunk_data_done, body_identity, body_identity_eof, message_done } enum HState { general, C, CO, CON, matching_connection, matching_proxy_connection, matching_content_length, matching_transfer_encoding, matching_upgrade, connection, content_length, transfer_encoding, upgrade, matching_transfer_encoding_chunked, matching_connection_keep_alive, matching_connection_close, transfer_encoding_chunked, connection_keep_alive, connection_close } public enum UrlFields { UF_SCHEMA(0), UF_HOST(1), UF_PORT(2), UF_PATH(3), UF_QUERY(4), UF_FRAGMENT(5), UF_MAX(6); private final int index; private UrlFields(int index) { this.index = index; } public int getIndex() { return index; } } }