/* * ContentTypeParser.java February 2001 * * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.simpleframework.http.parse; import org.simpleframework.common.KeyMap; import org.simpleframework.common.parse.ParseBuffer; import org.simpleframework.common.parse.Parser; import org.simpleframework.http.ContentType; /** * This provides access to the MIME type parts, that is the primary * type, the secondary type and an optional character set parameter. * The <code>charset</code> parameter is one of many parameters that * can be associated with a MIME type. This however this exposes this * parameter with a typed method. * <p> * The <code>getCharset</code> will return the character encoding the * content type is encoded within. This allows the user of the content * to decode it correctly. Other parameters can be acquired from this * by simply providing the name of the parameter. * * @author Niall Gallagher */ public class ContentTypeParser extends Parser implements ContentType { /** * Used to store the characters consumed for the secondary type. */ private ParseBuffer secondary; /** * Used to store the characters consumed for the primary type. */ private ParseBuffer primary; /** * Used to store the characters for the charset parameter. */ private ParseBuffer charset; /** * Used to store the characters consumed for the type. */ private ParseBuffer type; /** * Used to collect the name of a content type parameter. */ private ParseBuffer name; /** * Used to collect the value of the content type parameter. */ private ParseBuffer value; /** * Used to store the name value pairs of the parameters. */ private KeyMap<String> map; /** * The default constructor will create a <code>ContentParser</code> * that contains no charset, primary or secondary. This can be used * to extract the primary, secondary and the optional charset * parameter by using the parser's <code>parse(String)</code> * method. */ public ContentTypeParser(){ this.secondary = new ParseBuffer(); this.primary = new ParseBuffer(); this.charset = new ParseBuffer(); this.value = new ParseBuffer(); this.type = new ParseBuffer(); this.name = new ParseBuffer(); this.map = new KeyMap<String>(); } /** * This is primarily a convenience constructor. This will parse * the <code>String</code> given to extract the MIME type. This * could be achieved by calling the default no-arg constructor * and then using the instance to invoke the <code>parse</code> * method on that <code>String</code>. * * @param header <code>String</code> containing a MIME type value */ public ContentTypeParser(String header){ this(); parse(header); } /** * This method is used to get the primary and secondary parts * joined together with a "/". This is typically how a content * type is examined. Here convenience is most important, we can * easily compare content types without any parameters. * * @return this returns the primary and secondary types */ public String getType() { return type.toString(); } /** * This sets the primary type to whatever value is in the string * provided is. If the string is null then this will contain a * null string for the primary type of the parameter, which is * likely invalid in most cases. * * @param value the type to set for the primary type of this */ public void setPrimary(String value) { type.reset(value); type.append('/'); type.append(secondary); primary.reset(value); } /** * This is used to retrieve the primary type of this MIME type. The * primary type part within the MIME type defines the generic type. * For example <code>text/plain; charset=UTF-8</code>. This will * return the text value. If there is no primary type then this * will return <code>null</code> otherwise the string value. * * @return the primary type part of this MIME type */ public String getPrimary() { return primary.toString(); } /** * This sets the secondary type to whatever value is in the string * provided is. If the string is null then this will contain a * null string for the secondary type of the parameter, which is * likely invalid in most cases. * * @param value the type to set for the primary type of this */ public void setSecondary(String value) { type.reset(primary); type.append('/'); type.append(value); secondary.reset(value); } /** * This is used to retrieve the secondary type of this MIME type. * The secondary type part within the MIME type defines the generic * type. For example <code>text/html; charset=UTF-8</code>. This * will return the HTML value. If there is no secondary type then * this will return <code>null</code> otherwise the string value. * * @return the primary type part of this MIME type */ public String getSecondary(){ return secondary.toString(); } /** * This will set the <code>charset</code> to whatever value the * string contains. If the string is null then this will not set * the parameter to any value and the <code>toString</code> method * will not contain any details of the parameter. * * @param enc parameter value to add to the MIME type */ public void setCharset(String enc) { charset.reset(enc); } /** * This is used to retrieve the <code>charset</code> of this MIME * type. This is a special parameter associated with the type, if * the parameter is not contained within the type then this will * return null, which typically means the default of ISO-8859-1. * * @return the value that this parameter contains */ public String getCharset() { return charset.toString(); } /** * This is used to retrieve an arbitrary parameter from the MIME * type header. This ensures that values for <code>boundary</code> * or other such parameters are not lost when the header is parsed. * This will return the value, unquoted if required, as a string. * * @param name this is the name of the parameter to be retrieved * * @return this is the value for the parameter, or null if empty */ public String getParameter(String name) { return map.get(name); } /** * This will add a named parameter to the content type header. If * a parameter of the specified name has already been added to the * header then that value will be replaced by the new value given. * Parameters such as the <code>boundary</code> as well as other * common parameters can be set with this method. * * @param name this is the name of the parameter to be added * @param value this is the value to associate with the name */ public void setParameter(String name, String value) { map.put(name, value); } /** * This will initialize the parser when it is ready to parse * a new <code>String</code>. This will reset the parser to a * ready state. The init method is invoked by the parser when * the <code>Parser.parse</code> method is invoked. */ protected void init(){ if(count > 0) { pack(); } clear(); } /** * This is used to clear all previously collected tokens. This * allows the parser to be reused when there are multiple source * strings to be parsed. Clearing of the tokens is performed * when the parser is initialized. */ private void clear() { primary.clear(); secondary.clear(); charset.clear(); name.clear(); value.clear(); type.clear(); map.clear(); off = 0; } /** * Reads and parses the MIME type from the given <code>String</code> * object. This uses the syntax defined by RFC 2616 for the media-type * syntax. This parser is only concerned with one parameter, the * <code>charset</code> parameter. The syntax for the media type is * <pre> * media-type = token "/" token *( ";" parameter ) * parameter = token | literal * </pre> */ protected void parse(){ primary(); off++; secondary(); parameters(); } /** * This is used to remove all whitespace characters from the * <code>String</code> excluding the whitespace within literals. * The definition of a literal can be found in RFC 2616. * <p> * The definition of a literal for RFC 2616 is anything between 2 * quotes but excluding quotes that are prefixed with the backward * slash character. */ private void pack() { char old = buf[0]; int len = count; int seek = 0; int pos = 0; while(seek < len){ char ch = buf[seek++]; if(ch == '"' && old != '\\'){ /* qd-text*/ buf[pos++] = ch; while(seek < len){ old = buf[seek-1]; ch = buf[seek++]; buf[pos++] = ch; if(ch =='"'&& old!='\\'){ /*qd-text*/ break; } } }else if(!space(ch)){ old = buf[seek - 1]; buf[pos++] = old; } } count = pos; } /** * This reads the type from the MIME type. This will fill the * type <code>ParseBuffer</code>. This will read all chars * upto but not including the first instance of a '/'. The type * of a media-type as defined by RFC 2616 is * <code>type/subtype;param=val;param2=val</code>. */ private void primary(){ while(off < count){ if(buf[off] =='/'){ type.append('/'); break; } type.append(buf[off]); primary.append(buf[off]); off++; } } /** * This reads the subtype from the MIME type. This will fill the * subtype <code>ParseBuffer</code>. This will read all chars * upto but not including the first instance of a ';'. The subtype * of a media-type as defined by RFC 2616 is * <code>type/subtype;param=val;param2=val</code>. */ private void secondary(){ while(off < count){ if(buf[off] ==';'){ break; } type.append(buf[off]); secondary.append(buf[off]); off++; } } /** * This will read the parameters from the MIME type. This will search * for the <code>charset</code> parameter within the set of parameters * which are given to the type. The <code>charset</code> param is the * only parameter that this parser will tokenize. * <p> * This will remove any parameters that preceed the charset parameter. * Once the <code>charset</code> is retrived the MIME type is considered * to be parsed. */ private void parameters(){ while(skip(";")){ if(skip("charset=")){ charset(); break; }else{ parameter(); insert(); } } } /** * This will add the name and value tokens to the parameters map. * If any previous value of the given name has been inserted * into the map then this will overwrite that value. This is * used to ensure that the string value is inserted to the map. */ private void insert() { insert(name, value); name.clear(); value.clear(); } /** * This will add the given name and value to the parameters map. * If any previous value of the given name has been inserted * into the map then this will overwrite that value. This is * used to ensure that the string value is inserted to the map. * * @param name this is the name of the value to be inserted * @param value this is the value of a that is to be inserted */ private void insert(ParseBuffer name, ParseBuffer value) { map.put(name.toString(), value.toString()); } /** * This is a parameter as defined by RFC 2616. The parameter is added to a * MIME type e.g. <code>type/subtype;param=val</code> etc. The parameter * name and value are not stored. This is used to simply update the read * offset past the parameter. The reason for reading the parameters is to * search for the <code>charset</code> parameter which will indicate the * encoding. */ private void parameter(){ name(); off++; /* = */ value(); } /** * This will simply read all characters from the buffer before the first '=' * character. This represents a parameter name (see RFC 2616 for token). The * parameter name is not buffered it is simply read from the buffer. This will * not cause an <code>IndexOutOfBoundsException</code> as each offset * is checked before it is acccessed. */ private void name(){ while(off < count){ if(buf[off] =='='){ break; } name.append(buf[off]); off++; } } /** * This is used to read a parameters value from the buf. This will read all * <code>char</code>'s upto but excluding the first terminal <code>char</code> * encountered from the off within the buf, or if the value is a literal * it will read a literal from the buffer (literal is any data between * quotes except if the quote is prefixed with a backward slash character). */ private void value(){ if(quote(buf[off])){ for(off++; off < count;){ if(quote(buf[off])){ if(buf[++off-2]!='\\'){ break; } } value.append(buf[off++]); } }else{ while(off < count){ if(buf[off] ==';') { break; } value.append(buf[off]); off++; } } } /** * This method is used to determine if the specified character is a quote * character. The quote character is typically used as a boundary for the * values within the header. This accepts a single or double quote. * * @param ch the character to determine if it is a quotation * * @return true if the character provided is a quotation character */ private boolean quote(char ch) { return ch == '\'' || ch == '"'; } /** * This is used to read the value from the <code>charset</code> param. * This will fill the <code>charset</code> <code>ParseBuffer</code> and with * the <code>charset</code> value. This will read a literal or a token as * the <code>charset</code> value. If the <code>charset</code> is a literal * then the quotes will be read as part of the charset. */ private void charset(){ if(buf[off] == '"'){ charset.append('"'); for(off++; off < count;){ charset.append(buf[off]); if(buf[off++]=='"') if(buf[off-2]!='\\'){ break; } } }else{ while(off < count){ if(buf[off]==';') { break; } charset.append(buf[off]); off++; } } } /** * This will return the value of the MIME type as a string. This * will concatenate the primary and secondary type values and * add the <code>charset</code> parameter to the type which will * recreate the content type. * * @return this returns the string representation of the type */ private String encode() { StringBuilder text = new StringBuilder(); if(primary != null) { text.append(primary); text.append("/"); text.append(secondary); } if(charset.length() > 0) { text.append("; charset="); text.append(charset); } return encode(text); } /** * This will return the value of the MIME type as a string. This * will concatenate the primary and secondary type values and * add the <code>charset</code> parameter to the type which will * recreate the content type. * * @param text this is the buffer to encode the parameters to * * @return this returns the string representation of the type */ private String encode(StringBuilder text) { for(String name : map) { String value = map.get(name); text.append("; "); text.append(name); if(value != null) { text.append("="); text.append(value);; } } return text.toString(); } /** * This will return the value of the MIME type as a string. This * will concatenate the primary and secondary type values and * add the <code>charset</code> parameter to the type which will * recreate the content type. * * @return this returns the string representation of the type */ public String toString() { return encode(); } }