/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Mar 9, 2011
*/
package com.bigdata.util.httpd;
import java.util.Enumeration;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.log4j.Logger;
/**
* This class provides set of utilities for encoding and decoding HTTP
* headers and doubles as the base class for all classes that
* implement support for a specific HTTP header, such as {@link
* LinkHeader}, {@link AcceptHeader}, etc.
*/
public class HTTPHeaderUtility
{
/**
* The {@link Logger} for HTTP header operations, including
* parsing and serialization. The {@link Logger} is named for
* this class. It should be used by all derived classes.
*/
protected static final Logger log = Logger.getLogger
( HTTPHeaderUtility.class
);
// quoted-string
// :=
//
// token := One of more of any CHAR except CTLs or separators.
//
// CTL := <any US-ASCII control character (octets 0 - 31) and
// DEL (127)>
//
// separators := "(" | ")" | "<" | ">" | "@"
// | "," | ";" | ":" | "\" | <">
// | "/" | "[" | "]" | "?" | "="
// | "{" | "}" | SP | HT
// token := One of more of any CHAR except CTLs or separators.
//
// CTL := <any US-ASCII control character (octets 0 - 31) and
// DEL (127)>
//
// separators := "(" | ")" | "<" | ">" | "@"
// | "," | ";" | ":" | "\" | <">
// | "/" | "[" | "]" | "?" | "="
// | "{" | "}" | SP | HT
/**
* Matches an HTTP <code>token</code>, which consists of one or
* more of any CHAR except <code>CTL</code> or
* <code>separator</code>.
*/
final protected static String httpTokenPattern =
"[^\\p{Cntrl}\\(\\)<>@,;:\\\\\\\"/\\[\\]\\?=\\{\\}\\s\\x09]+"
;
/**
* The text for a {@link Pattern} matching an HTTP
* <code>quoted-string</code>.<p>
*
* From HTTP/1.1:<p>
*
* A string of text is parsed as a single word if it is quoted
* using double-quote marks.<p>
*
* The backslash character '\' MAY be used as a single-character
* quoting mechanism only within quoted-string and comment
* constructs.<p>
*
<pre>
quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
qdtext = <any TEXT except <">>
quoted-pair = "\" CHAR
TEXT = <any OCTET except CTLs, but including LWS>
</pre>
* Note: This pattern text uses a non-capturing group to
* encapsulate the choice between a legal character (TEXT) and an
* escaped character (quoted-pair) within the context of the
* quoted-string. Any quoted-pair sequences must be reversed by
* the consumer of the matched group.<p>
*/
final protected static String httpQuotedStringPattern =
"\\\"(?:\\\\\"|[^\\p{Cntrl}\\\"])*\\\""
;
/**
* Returns true iff the character is an HTTP <code>CTL</code>
* character.
*/
static public boolean isHttpCtlChar( char ch )
{
int c = (int)ch;
if( c >= 0 && c <= 31 ) return true;
if( c == 127 ) return true;
return false;
}
/**
* Returns true iff the character is an HTTP
* <code>separator</code> character.
*/
static public boolean isHttpSeparatorChar( char ch )
{
switch( ch ) {
case '(':
case ')':
case '<':
case '>':
case '@':
case ',':
case ';':
case ':':
case '\\':
case '"':
case '/':
case '[':
case ']':
case '?':
case '=':
case '{':
case '}':
case ' ': // ASCII space (32).
case (char)0x09: // Horizontal tab (9).
return true;
}
return false;
}
/**
* Matches an HTTP <code>token</code>, which consists of one or
* more of any CHAR except <code>CTL</code> or
* <code>separator</code>.
*/
final protected static String tok =
"[^\\p{Cntrl}\\(\\)<>@,;:\\\\\\\"/\\[\\]\\?=\\{\\}\\s\\x09]+"
;
/**
* Returns true iff the {@link String} obeys the syntax rules
* for an HTTP <code>token</code>.
*/
static public boolean isHttpToken( String token )
{
int len = token.length();
for( int i=0; i<len; i++ ) {
char ch = token.charAt( i );
if( isHttpCtlChar( ch ) ||
isHttpSeparatorChar( ch )
) {
return false;
}
}
return true;
}
/**
* The text for a {@link Pattern} matching an HTTP
* <code>quoted-string</code>.<p>
*
* From HTTP/1.1:<p>
*
* A string of text is parsed as a single word if it is quoted
* using double-quote marks.<p>
*
* The backslash character '\' MAY be used as a single-character
* quoting mechanism only within quoted-string and comment
* constructs.<p>
*
<pre>
quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
qdtext = <any TEXT except <">>
quoted-pair = "\" CHAR
TEXT = <any OCTET except CTLs, but including LWS>
</pre>
* Note: This pattern text uses a non-capturing group to
* encapsulate the choice between a legal character (TEXT) and an
* escaped character (quoted-pair) within the context of the
* quoted-string. Any quoted-pair sequences must be reversed by
* the consumer of the matched group.<p>
*/
final protected static String qs =
"\\\"(?:\\\\\"|[^\\p{Cntrl}\\\"])*\\\""
;
/**
* Pattern used to match the type/subtype of a MIME expression.
* The matched groups are numbered by the opening parentheses in
* the pattern. The different matching groups are:<ul>
*
* <li> group(0) : the matched input.
*
* <li> group(1) : type
*
* <li> group(2) : subtype
*
* <li> group(3) : the rest of the input, to which you then apply
* {@link #m_p2}.
*
* </ul>
*/
static protected Pattern m_p1 = null;
/**
* Pattern used to match the optional parameters of a MIME
* expression. The matched groups are numbered by the opening
* parentheses in the pattern. The source for the pattern is the
* parameters as identified by {@link #m_p1}. The different
* matching groups are:<ul>
*
* <li> group( 0 ) : parameter ( attribute=value ).
*
* <li> group( 1 ) : attribute (parameter name).
*
* <li> group( 2 ) : value for that parameter.
*
* </ul>
*
* Note: You should match this pattern repeatedly until the input
* is exhausted.<p>
*/
static protected Pattern m_p2 = null;
/**
* Initialization for shared {@link Pattern} object that matches
* valid MIME type expressions.
*/
static protected void init()
{
try {
if( m_p1 != null && m_p2 != null ) return;
m_p1 = Pattern.compile
( "^("+tok+")/("+tok+")(.*)$"
);
m_p2 = Pattern.compile
( "\\s*;\\s*("+tok+")=("+tok+"|"+qs+")\\s*"
);
}
catch( PatternSyntaxException ex ) {
/* Masquerade this exception so that it does not show up
* on method signatures throughout this and other
* packages. The decision to use java.util.regex here is
* an implementation decision and its exception signatures
* should not be propagated.
*/
AssertionError err = new AssertionError
( "Could not compile regex patterns."
);
err.initCause( ex );
throw err;
}
}
/**
* Returns the MIME type parameter value as either an HTTP
* <code>token</code> or HTTP <code>quoted-string</code> depending
* on whether or not it contains any characters that need to be
* escaped.
*
* @param force When true the returned value is always a
* <code>quoted-string</code>.
*/
static public String quoteString
( String value,
boolean force
)
{
StringBuffer sb = new StringBuffer();
int len = value.length();
boolean didEscape = false;
for( int i=0; i<len; i++ ) {
char ch = value.charAt( i );
if( isHttpCtlChar( ch ) ||
isHttpSeparatorChar( ch )
) {
sb.append( '\\' );
didEscape = true;
}
sb.append( ch );
}
return ( didEscape || force )
? "\""+sb.toString()+"\""
: sb.toString()
;
}
/**
* If the value is an HTTP <code>quoted-string</code> then we
* strip of the quote characters now and translate any escaped
* characters into themselves, e.g., '\"' => '"'. Otherwise
* returns <i>value</i>.
*
* @param IllegalArgumentException If the value is a malformed
* quoted string. For example, no closing quote character or no
* character after an escape character.
*/
static public String unquoteString( String value )
throws IllegalArgumentException
{
String originalValue = value; // save.
/* Do nothing unless a quoted-string.
*/
if( ! value.startsWith( "\"" ) ) {
return value;
}
/* Drop off the quote characters.
*/
if( ! value.endsWith( "\"" ) ) {
throw new IllegalArgumentException
( "Quoted string does not end with '\"'"+
" : "+originalValue
);
}
// Chop off the quote characters.
value = value.substring
( 1,
value.length() - 1
);
// Translate escaped characters.
StringBuffer sb = new StringBuffer();
int len = value.length();
for( int i=0; i<len; i++ ) {
char ch = value.charAt( i );
if( ch == '\\' ) {
i++;
if( i < len ) {
ch = value.charAt( i );
sb.append( ch );
} else {
throw new IllegalArgumentException
( "Escape character at end of string"+
" : "+originalValue
);
}
} else {
sb.append( ch );
}
}
return sb.toString();
}
/**
* HTTP permits headers whose grammar is a comma delimited list to
* be specified multiple times in an HTTP request. This method
* propertly combines the specified values need into a {@link
* String} containing a comma-delimited list that preserves the
* order in which the header values were specified.
*
* @param values E.g., as returned by {@link
* javax.servlet.http#getHeaders( String name )}.
*
* @param defaultValue IFF <i>enum</i> is an empty enumeration,
* then this value is returned to the caller.
*/
public static String combineHeaders
( Enumeration values,
String defaultValue
)
{
if( ! values.hasMoreElements() ) {
return defaultValue;
}
StringBuffer sb = new StringBuffer();
boolean first = true;
while( values.hasMoreElements() ) {
String value = (String)values.nextElement();
if( ! first ) {
sb.append( ", " );
}
sb.append( value );
first = false;
}
return sb.toString();
}
/**
* Splits out the elements for an HTTP header value whose grammar
* is a comma delimited list.
*/
public static String[] splitCommaList
( String value
)
{
log.debug
( "Header-Value: "+value
);
String[] values = value.split
( "\\s*,\\s*"
);
log.debug
( "Found "+values.length+" elements in list."
);
return values;
}
}