/* Copyright (c) 2008 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.gdata.util;
import com.google.gdata.client.Service;
import java.io.Serializable;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Simple class for parsing and generating Content-Type header values, per
* RFC 2045 (MIME) and 2616 (HTTP 1.1).
*
*
*/
public class ContentType implements Serializable {
private static String TOKEN =
"[\\p{ASCII}&&[^\\p{Cntrl} ;/=\\[\\]\\(\\)\\<\\>\\@\\,\\:\\\"\\?\\=]]+";
// Precisely matches a token
private static Pattern TOKEN_PATTERN = Pattern.compile(
"^" + TOKEN + "$");
// Matches a media type value
private static Pattern TYPE_PATTERN = Pattern.compile(
"(" + TOKEN + ")" + // type (G1)
"/" + // separator
"(" + TOKEN + ")" + // subtype (G2)
"\\s*(.*)\\s*", Pattern.DOTALL);
// Matches an attribute value
private static Pattern ATTR_PATTERN = Pattern.compile(
"\\s*;\\s*" +
"(" + TOKEN + ")" + // attr name (G1)
"\\s*=\\s*" +
"(?:" +
"\"([^\"]*)\"" + // value as quoted string (G3)
"|" +
"(" + TOKEN + ")?" + // value as token (G2)
")"
);
/**
* Name of the attribute that contains the encoding character set for
* the content type.
* @see #getCharset()
*/
public static final String ATTR_CHARSET = "charset";
/**
* Special "*" character to match any type or subtype.
*/
private static final String STAR = "*";
/**
* The UTF-8 charset encoding is used by default for all text and xml
* based MIME types.
*/
private static final String DEFAULT_CHARSET = ATTR_CHARSET + "=UTF-8";
/**
* A ContentType constant that describes the base unqualified Atom content
* type.
*/
public static final ContentType ATOM =
new ContentType("application/atom+xml;" + DEFAULT_CHARSET).lock();
/**
* A ContentType constant that describes the qualified Atom entry content
* type.
*
* @see #getAtomEntry()
*/
public static final ContentType ATOM_ENTRY =
new ContentType("application/atom+xml;type=entry;" + DEFAULT_CHARSET)
.lock();
/**
* A ContentType constant that describes the qualified Atom feed content
* type.
*
* @see #getAtomFeed()
*/
public static final ContentType ATOM_FEED =
new ContentType("application/atom+xml;type=feed;" + DEFAULT_CHARSET)
.lock();
/**
* Returns the ContentType that should be used in contexts that expect
* an Atom entry.
*/
public static ContentType getAtomEntry() {
// Use the unqualifed type for v1, the qualifed one for later versions
return Service.getVersion().isCompatible(Service.Versions.V1) ?
ATOM : ATOM_ENTRY;
}
/**
* Returns the ContentType that should be used in contexts that expect
* an Atom feed.
*/
public static ContentType getAtomFeed() {
// Use the unqualified type for v1, the qualified one for later versions
return Service.getVersion().isCompatible(Service.Versions.V1) ?
ATOM : ATOM_FEED;
}
/**
* A ContentType constant that describes the Atom Service content type.
*/
public static final ContentType ATOM_SERVICE =
new ContentType("application/atomsvc+xml;" + DEFAULT_CHARSET).lock();
/**
* A ContentType constant that describes the RSS channel/item content type.
*/
public static final ContentType RSS =
new ContentType("application/rss+xml;" + DEFAULT_CHARSET).lock();
/**
* A ContentType constant that describes the JSON content type.
*/
public static final ContentType JSON =
new ContentType("application/json;" + DEFAULT_CHARSET).lock();
/**
* A ContentType constant that describes the Javascript content type.
*/
public static final ContentType JAVASCRIPT =
new ContentType("text/javascript;" + DEFAULT_CHARSET).lock();
/**
* A ContentType constant that describes the generic text/xml content type.
*/
public static final ContentType TEXT_XML =
new ContentType("text/xml;" + DEFAULT_CHARSET).lock();
/**
* A ContentType constant that describes the generic text/html content type.
*/
public static final ContentType TEXT_HTML =
new ContentType("text/html;" + DEFAULT_CHARSET).lock();
/**
* A ContentType constant that describes the generic text/plain content type.
*/
public static final ContentType TEXT_PLAIN =
new ContentType("text/plain;" + DEFAULT_CHARSET).lock();
/**
* A ContentType constant that describes the GData error content type.
*/
public static final ContentType GDATA_ERROR =
new ContentType("application/vnd.google.gdata.error+xml").lock();
/**
* A ContentType constant that describes the OpenSearch description document
*/
public static final ContentType OPENSEARCH =
new ContentType("application/opensearchdescription+xml").lock();
/**
* A ContentType constant that describes the MIME multipart/related content
* type.
*/
public static final ContentType MULTIPART_RELATED =
new ContentType("multipart/related").lock();
/**
* A ContentType constant that describes the application/xml content
* type.
*/
public static final ContentType APPLICATION_XML =
new ContentType("application/xml").lock();
/**
* A ContentType constant that indicates that the body contains an
* encapsulated message, with the syntax of an RFC 822 email message.
*/
public static final ContentType MESSAGE_RFC822 =
new ContentType("message/rfc822").lock();
/**
* Wildcard content type that will match any MIME type
*/
public static final ContentType ANY = new ContentType("*/*").lock();
/**
* Determines the best "Content-Type" header to use in a servlet response
* based on the "Accept" header from a servlet request.
*
* @param acceptHeader "Accept" header value from a servlet request (not
* <code>null</code>)
* @param actualContentTypes actual content types in descending order of
* preference (non-empty, and each entry is of the
* form "type/subtype" without the wildcard char
* '*') or <code>null</code> if no "Accept" header
* was specified
* @return the best content type to use (or <code>null</code> on no match).
*/
public static ContentType getBestContentType(String acceptHeader,
List<ContentType> actualContentTypes) {
// If not accept header is specified, return the first actual type
if (acceptHeader == null) {
return actualContentTypes.get(0);
}
// iterate over all of the accepted content types to find the best match
float bestQ = 0;
ContentType bestContentType = null;
String[] acceptedTypes = acceptHeader.split(",");
for (String acceptedTypeString : acceptedTypes) {
// create the content type object
ContentType acceptedContentType;
try {
acceptedContentType = new ContentType(acceptedTypeString.trim());
} catch (IllegalArgumentException ex) {
// ignore exception
continue;
}
// parse the "q" value (default of 1)
float curQ = 1;
try {
String qAttr = acceptedContentType.getAttribute("q");
if (qAttr != null) {
float qValue = Float.valueOf(qAttr);
if (qValue <= 0 || qValue > 1) {
continue;
}
curQ = qValue;
}
} catch (NumberFormatException ex) {
// ignore exception
continue;
}
// only check it if it's at least as good ("q") as the best one so far
if (curQ < bestQ) {
continue;
}
/* iterate over the actual content types in order to find the best match
to the current accepted content type */
for (ContentType actualContentType : actualContentTypes) {
/* if the "q" value is the same as the current best, only check for
better content types */
if (curQ == bestQ && bestContentType == actualContentType) {
break;
}
/* check if the accepted content type matches the current actual
content type */
if (actualContentType.match(acceptedContentType)) {
bestContentType = actualContentType;
bestQ = curQ;
break;
}
}
}
// if found an acceptable content type, return the best one
if (bestQ != 0) {
return bestContentType;
}
// Return null if no match
return null;
}
/**
* Constructs a new instance with default media type
*/
public ContentType() {
this(null);
}
/**
* Constructs a new instance from a content-type header value
* parsing the MIME content type (RFC2045) format. If the type
* is {@code null}, then media type and charset will be
* initialized to default values.
*
* @param typeHeader content type value in RFC2045 header format.
*/
public ContentType(String typeHeader) {
// If the type header is no provided, then use the HTTP defaults.
if (typeHeader == null) {
type = "application";
subType = "octet-stream";
attributes.put(ATTR_CHARSET, "iso-8859-1"); // http default
return;
}
// Get type and subtype
Matcher typeMatch = TYPE_PATTERN.matcher(typeHeader);
if (!typeMatch.matches()) {
throw new IllegalArgumentException("Invalid media type:" + typeHeader);
}
type = typeMatch.group(1).toLowerCase();
subType = typeMatch.group(2).toLowerCase();
if (typeMatch.groupCount() < 3) {
return;
}
// Get attributes (if any)
Matcher attrMatch = ATTR_PATTERN.matcher(typeMatch.group(3));
while (attrMatch.find()) {
String value = attrMatch.group(2);
if (value == null) {
value = attrMatch.group(3);
if (value == null) {
value = "";
}
}
attributes.put(attrMatch.group(1).toLowerCase(), value);
}
// Infer a default charset encoding if unspecified.
if (!attributes.containsKey(ATTR_CHARSET)) {
inferredCharset = true;
if (subType.endsWith("xml")) {
if (type.equals("application")) {
// BUGBUG: Actually have need to look at the raw stream here, but
// if client omitted the charset for "application/xml", they are
// ignoring the STRONGLY RECOMMEND language in RFC 3023, sec 3.2.
// I have little sympathy.
attributes.put(ATTR_CHARSET, "utf-8"); // best guess
} else {
attributes.put(ATTR_CHARSET, "us-ascii"); // RFC3023, sec 3.1
}
} else if (subType.equals("json")) {
attributes.put(ATTR_CHARSET, "utf-8"); // RFC4627, sec 3
} else {
attributes.put(ATTR_CHARSET, "iso-8859-1"); // http default
}
}
}
/** {@code true} if parsed input didn't contain charset encoding info */
private boolean inferredCharset = false;
/** If set to {@code true}, the object is immutable. */
private boolean locked;
private String type;
public String getType() { return type; }
public void setType(String type) {
assertNotLocked();
this.type = type;
}
private String subType;
public String getSubType() { return subType; }
public void setSubType(String subType) {
assertNotLocked();
this.subType = subType;
}
/** Returns the full media type */
public String getMediaType() {
StringBuilder sb = new StringBuilder();
sb.append(type);
sb.append("/");
sb.append(subType);
if (attributes.containsKey("type")) {
sb.append(";type=").append(attributes.get("type"));
}
return sb.toString();
}
private HashMap<String, String> attributes = new HashMap<String, String>();
/**
* Makes the object immutable and returns it.
*
* This should at least be used when keeping a {@link ContentType} instance as
* a static.
*/
public ContentType lock() {
locked = true;
return this;
}
private void assertNotLocked() {
if (locked) {
throw new IllegalStateException("Unmodifiable instance");
}
}
/**
* Returns the additional attributes of the content type.
*/
public Map<String, String> getAttributes() {
if (locked) {
return Collections.unmodifiableMap(attributes);
}
return attributes;
}
/**
* Returns the additional attribute by name of the content type.
*
* @param name attribute name
*/
public String getAttribute(String name) {
return attributes.get(name);
}
/*
* Returns the charset attribute of the content type or null if the
* attribute has not been set.
*/
public String getCharset() { return attributes.get(ATTR_CHARSET); }
/**
* Returns whether this content type is match by the content type found in the
* "Accept" header field of an HTTP request.
*
* <p>For atom content type, this method will check the optional attribute
* 'type'. If the type attribute is set in both this and {@code
* acceptedContentType}, then they must be the same. That is, {@code
* application/atom+xml} will match both {@code
* application/atom+xml;type=feed} and {@code
* application/atom+xml;type=entry}, but {@code
* application/atom+xml;type=entry} will not match {@code
* application/atom+xml;type=feed}.a
*
* @param acceptedContentType content type found in the "Accept" header field
* of an HTTP request
*/
public boolean match(ContentType acceptedContentType) {
String acceptedType = acceptedContentType.getType();
String acceptedSubType = acceptedContentType.getSubType();
return STAR.equals(acceptedType) || type.equals(acceptedType)
&& (STAR.equals(acceptedSubType) || subType.equals(acceptedSubType))
&& (!isAtom() || matchAtom(acceptedContentType));
}
/** Returns true if this is an atom content type. */
private boolean isAtom() {
return "application".equals(type) && "atom+xml".equals(subType);
}
/**
* Compares the optional 'type' attribute of two content types.
*
* <p>This method accepts atom content type without the 'type' attribute
* but if the types are specified, they must match.
*/
private boolean matchAtom(ContentType acceptedContentType) {
String atomType = getAttribute("type");
String acceptedAtomType = acceptedContentType.getAttribute("type");
return atomType == null || acceptedAtomType == null
|| atomType.equals(acceptedAtomType);
}
/**
* Generates the Content-Type value
*/
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(type);
sb.append("/");
sb.append(subType);
for (String name : attributes.keySet()) {
// Don't include any inferred charset attribute in output.
if (inferredCharset && ATTR_CHARSET.equals(name)) {
continue;
}
sb.append(";");
sb.append(name);
sb.append("=");
String value = attributes.get(name);
Matcher tokenMatcher = TOKEN_PATTERN.matcher(value);
if (tokenMatcher.matches()) {
sb.append(value);
} else {
sb.append("\"" + value + "\"");
}
}
return sb.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ContentType that = (ContentType) o;
return type.equals(that.type) && subType.equals(that.subType) && attributes
.equals(that.attributes);
}
@Override
public int hashCode() {
return (type.hashCode() * 31 + subType.hashCode()) * 31 + attributes
.hashCode();
}
}