/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package com.facebook.infrastructure.net.http;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
/**
*
* @author kranganathan
*/
/**
* A parser for HTTP header lines.
*
*/
public class HttpHeaderParser
{
private Callback callback_;
public interface Callback
{
public void onHeader(String key, String value);
}
public HttpHeaderParser(Callback cb)
{
callback_ = cb;
}
enum HeaderParseState
{
// we are at the very beginning of the line
START_OF_HEADER_LINE,
// are at line beginning, read '\r' but ran out of bytes in this round
START_OF_HEADER_LINE_WITH_READ_SLASH_R,
// we are in the process of parsing a header key
IN_HEADER_KEY,
// eat whitespace after the ':' but before the value
PRE_HEADER_VALUE_WHITESPACE,
// we are in the process of parsing a header value
IN_HEADER_VALUE,
// were in IN_HEADER_VALUE and read '\r' but ran out of more bytes
IN_HEADER_VALUE_WITH_READ_SLASH_R,
/*
* got \r\n in the header value. now consider whether its a multilined
* value. For example,
*
* HeaderKey: HeaderValue\r\n this is still part of the value\r\n
*
* is a valid HTTP header line with value
*
* HeaderValue\r\n this is still part of the value
*
* NOTE: while all whitespace should generally be condensed into a
* single space by the HTTP standard, we will just preserve all of the
* whitespace for now
*
* TODO: consider replacing all whitespace with a single space
*
* TODO: this parser doesn't correctly preserve the \r\n, should it?
*/
CHECKING_END_OF_VALUE,
// we are just about to reset the state of the header parser
TO_RESET
}
// the current state of the parser
private HeaderParseState parseState_ = HeaderParseState.TO_RESET;
// incrementally build up this HTTP header key as we read it
private StringBuilder headerKey_ = new StringBuilder(32);
// incrementally build up this HTTP header value as we read it
private StringBuilder headerValue_ = new StringBuilder(64);
public void resetParserState()
{
headerKey_.setLength(0);
headerValue_.setLength(0);
parseState_ = HeaderParseState.START_OF_HEADER_LINE;
}
private void finishCurrentHeader_()
{
if (callback_ != null)
{
callback_.onHeader(headerKey_.toString().trim(), headerValue_
.toString().trim());
}
resetParserState();
}
public boolean onMoreBytes(InputStream in) throws IOException
{
int got;
if (parseState_ == HeaderParseState.TO_RESET)
{
resetParserState();
}
while (in.available() > 0)
{
in.mark(1);
got = in.read();
switch (parseState_)
{
case START_OF_HEADER_LINE:
switch (got)
{
case '\r':
if (in.available() > 0)
{
in.mark(1);
got = in.read();
if (got == '\n')
{
parseState_ = HeaderParseState.TO_RESET;
return true;
} // TODO: determine whether this \r-eating is valid
else
{
in.reset();
}
} // wait for more data to make this decision
else
{
in.reset();
return false;
}
break;
default:
in.reset();
parseState_ = HeaderParseState.IN_HEADER_KEY;
break;
}
break;
case IN_HEADER_KEY:
switch (got)
{
case ':':
parseState_ = HeaderParseState.PRE_HEADER_VALUE_WHITESPACE;
break;
// TODO: find out: whether to eat whitespace before a :
default:
headerKey_.append((char) got);
break;
}
break;
case PRE_HEADER_VALUE_WHITESPACE:
switch (got)
{
case ' ':
case '\t':
break;
default:
in.reset();
parseState_ = HeaderParseState.IN_HEADER_VALUE;
break;
}
break;
case IN_HEADER_VALUE:
switch (got)
{
case '\r':
if (in.available() > 0)
{
in.mark(1);
got = in.read();
if (got == '\n')
{
parseState_ = HeaderParseState.CHECKING_END_OF_VALUE;
break;
} // TODO: determine whether this \r-eating is valid
else
{
in.reset();
}
}
else
{
in.reset();
return false;
}
break;
default:
headerValue_.append((char) got);
break;
}
break;
case CHECKING_END_OF_VALUE:
switch (got)
{
case ' ':
case '\t':
in.reset();
parseState_ = HeaderParseState.IN_HEADER_VALUE;
break;
default:
in.reset();
finishCurrentHeader_();
}
break;
default:
assert false;
parseState_ = HeaderParseState.START_OF_HEADER_LINE;
break;
}
}
return false;
}
public boolean onMoreBytesNew(ByteBuffer buffer) throws IOException
{
int got;
int limit = buffer.limit();
int pos = buffer.position();
if (parseState_ == HeaderParseState.TO_RESET)
{
resetParserState();
}
while (pos < limit)
{
switch (parseState_)
{
case START_OF_HEADER_LINE:
if ((got = buffer.get(pos)) != '\r')
{
parseState_ = HeaderParseState.IN_HEADER_KEY;
break;
}
else
{
pos++;
if (pos == limit) // Need more bytes
{
buffer.position(pos);
parseState_ = HeaderParseState.START_OF_HEADER_LINE_WITH_READ_SLASH_R;
return false;
}
}
// fall through
case START_OF_HEADER_LINE_WITH_READ_SLASH_R:
// Processed "...\r\n\r\n" - headers are complete
if (((char) buffer.get(pos)) == '\n')
{
buffer.position(++pos);
parseState_ = HeaderParseState.TO_RESET;
return true;
} // TODO: determine whether this \r-eating is valid
else
{
parseState_ = HeaderParseState.IN_HEADER_KEY;
}
//fall through
case IN_HEADER_KEY:
// TODO: find out: whether to eat whitespace before a :
while (pos < limit && (got = buffer.get(pos)) != ':')
{
headerKey_.append((char) got);
pos++;
}
if (pos < limit)
{
pos++; //eating ':'
parseState_ = HeaderParseState.PRE_HEADER_VALUE_WHITESPACE;
}
break;
case PRE_HEADER_VALUE_WHITESPACE:
while ((((got = buffer.get(pos)) == ' ') || (got == '\t'))
&& (++pos < limit))
{
;
}
if (pos < limit)
{
parseState_ = HeaderParseState.IN_HEADER_VALUE;
}
break;
case IN_HEADER_VALUE:
while (pos < limit && (got = buffer.get(pos)) != '\r')
{
headerValue_.append((char) got);
pos++;
}
if (pos == limit)
{
break;
}
pos++;
if (pos == limit)
{
parseState_ = HeaderParseState.IN_HEADER_VALUE_WITH_READ_SLASH_R;
break;
//buffer.position(pos);
//return false;
}
// fall through
case IN_HEADER_VALUE_WITH_READ_SLASH_R:
if (((char) buffer.get(pos)) == '\n')
{
parseState_ = HeaderParseState.CHECKING_END_OF_VALUE;
pos++;
} // TODO: determine whether this \r-eating is valid
else
{
parseState_ = HeaderParseState.IN_HEADER_VALUE;
}
break;
case CHECKING_END_OF_VALUE:
switch ((char) buffer.get(pos))
{
case ' ':
case '\t':
parseState_ = HeaderParseState.IN_HEADER_VALUE;
break;
default:
// Processed "headerKey headerValue\r\n"
finishCurrentHeader_();
}
break;
default:
assert false;
parseState_ = HeaderParseState.START_OF_HEADER_LINE;
break;
}
}
// Need to read more bytes - get next buffer
buffer.position(pos);
return false;
}
}