/*
* Copyright 2010 FatWire Corporation. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package tools.gsf.url;
import com.fatwire.cs.core.uri.Assembler;
import com.fatwire.cs.core.uri.Util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
/**
* Lightweight abstract assembler that handles property management, provides a
* logger, handles encoding and decoding and query string processing. Much
* lighter in weight than <code>com.fatwire.cs.core.uri.AbstractAssembler</code>
* .
*
* @author Tony Field
* @since Sep 27, 2008
*/
public abstract class LightweightAbstractAssembler implements Assembler {
/**
* Logger for use by sub-classes.
*/
protected static final Logger LOG = LoggerFactory.getLogger("tools.gsf.url.LightweightAbstractAssembler");
private static final String CHARSET_lower = "_charset_";
private static final String CHARSET_upper = "_CHARSET_";
private final String encoding;
private final Map<String, String> properties = new HashMap<String, String>();
/**
* Constructor. Upon object construction, support for UTF-8 encoding is
* tested, and the result is cached for future use in the encode() and
* decode() methods.
* <p>
* UTF-8 is the recommended URLEncoding:
* <ul>
* <li><a
* href="http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"
* >http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars</a></li>
* <li><a
* href="http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html"
* >http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html</a></li>
* <li><a
* href="http://www.ietf.org/rfc/rfc2396.txt">http://www.ietf.org/rfc/
* rfc2396.txt</a></li>
* </ul>
*/
protected LightweightAbstractAssembler() {
String enc = "UTF-8";
try {
Util.encode("fake string", enc);
} catch (UnsupportedEncodingException e) {
LOG.warn("UTF-8 encoding not supported by this platform. Using the platform's default encoding as the URL encoding.");
enc = null;
}
this.encoding = enc;
}
public void setProperties(Properties props) {
Enumeration<?> en = props.propertyNames();
while (en.hasMoreElements()) {
String pName = (String) en.nextElement();
String pValue = props.getProperty(pName);
this.properties.put(pName, pValue);
}
}
/**
* Convenience method to get a property value set into the assembler from
* the configuration files.
*
* @param name name of property to import
* @param dephault default value of property - returned if the property
* value is not specified
* @return property value or dephault value
*/
protected String getProperty(String name, String dephault) {
String result = properties.get(name);
if (result == null) {
result = dephault;
}
return result;
}
/**
* URLEncodes a string using the encoding specified by this class.
*
* @param string the string to encode
* @return encoded string
* @throws IllegalStateException if UTF-8 encoding is not supported and the
* platform's default encoding is not supported.
*/
protected final String encode(String string) {
String result;
try {
if (string == null) {
result = null;
} else {
result = Util.encode(string, encoding);
}
} catch (UnsupportedEncodingException ex) {
String msg = "Unexpected failure encoding string '" + string + "'using an encoding (" + encoding
+ "). Exception: " + ex;
throw new IllegalStateException(msg);
}
return result;
}
/**
* URLDecodes a string using the encoding specified by this class.
*
* @param string encoded string
* @return decoded string
* @throws IllegalStateException if UTF-8 encoding is not supported and the
* platform's default encoding is not supported.
* @throws IllegalArgumentException if the string is not well-formed for
* decoding.
*/
protected final String decode(String string) {
return decode(string, null);
}
/**
* URLDecodes a string using the encoding specified.
*
* @param string encoded string
* @param encoding the encoding to use to decode the string. If null is
* specified, the decoding specified by this class shall be used.
* @return decoded string
* @throws IllegalStateException if the encoding specified is not supported,
* or if UTF-8 encoding is not supported and the platform's
* default encoding is not supported.
* @throws IllegalArgumentException if the string is not well-formed for
* decoding.
*/
protected final String decode(String string, String encoding) {
String result;
if (string == null) {
result = null;
} else {
if (encoding == null) {
encoding = this.encoding;
}
try {
result = Util.decode(string, encoding);
} catch (IllegalArgumentException iae) {
throw new IllegalArgumentException("Failure decoding string '" + string + "' using encoding '"
+ encoding + "'. (" + iae.getMessage() + ")");
} catch (UnsupportedEncodingException ex) {
// This is not expected to ever occur.
throw new IllegalStateException("Unexpected failure decoding string '" + string + "'using encoding '"
+ encoding + "'. (" + ex + ")");
}
}
return result;
}
/**
* The multi-arg <code>java.net.URI</code> constructors quote illegal
* characters. However, this class requires that the query string already be
* properly URLEncoded. As a result, we can't use the multi-arg URI
* constructor because all of our % symbols and the + symbol will end up
* getting double-encoded. So, we need to construct a full URL ourselves so
* we can use the single-arg URI constructor, because it does not quote
* anything.
* <p>
* There are multiple variants of combinations of these parameters to create
* a valid URL. Consult the URI specificaiton for what is allowed and what
* is not. The URI constructor will throw a URISyntaxException if required
* components are missing for a given combination.
*
* @param scheme the URI scheme (protocol)
* @param authority the URI authority (host:port)
* @param path the path for the URI (servlet context path, servlet name,
* pathinfo)
* @param quotedQueryString the query string, with illegal characters
* already quoted.
* @param fragment the fragment (anchor)
* @return the valid URI with proper encoding
* @throws URISyntaxException if there is a problem with what is passed in
*/
protected static final URI constructURI(final String scheme, final String authority, final String path,
final String quotedQueryString, final String fragment) throws URISyntaxException {
// Update, Feb 25, 2005 by Tony Field
StringBuilder bf = new StringBuilder();
if (scheme != null) {
bf.append(scheme).append(':'); // nothing legal can be quoted
}
if (authority != null) {
bf.append("//").append(authority); // nothing legal to quote until
// I18N URLs work
}
// Path needs quoting though, so let the URI object do it for us.
// Use the toASCIIString() method because we need the quoted values.
// (toString() is really just for readability and debugging, not
// programmatic use)
if (path != null) {
bf.append(new URI(null, null, path, null, null).getRawPath());
}
if (quotedQueryString != null) {
bf.append('?').append(quotedQueryString); // already quoted
}
// needs quoting
if (fragment != null) {
bf.append(new URI(null, null, null, null, fragment).toASCIIString());
}
URI uri = new URI(bf.toString());
if (LOG.isDebugEnabled()) {
LOG.trace("Constructing new URI using the following components: \n" + "scheme=" + scheme + " \n"
+ "authority=" + authority + " \n" + "path=" + path + " \n" + "query=" + quotedQueryString + " \n"
+ "fragment=" + fragment);
LOG.debug("Assembled URI: " + uri.toASCIIString());
}
return uri;
}
/**
* Parse a query string and put the parameters into a map. Input parameters
* will be URLDecoded prior to their addition into the resultant map.
* <p>
* Note that the map returned contains a <em><code>String[]</code> as the
* value, not a single <code>String</code> value</em> This provides support
* for query strings with multiple values for a given parameter name.
* <p>
* This decoding method is smart enough to be able to interpret the
* <code>_charset_</code> URL parameter that is often used by IE.
*
* @param qry string value for query
* @return map containing <code>String</code>/<code>String[]</code> pairs.
* @throws IllegalArgumentException if there are mistakes in the string that
* make it impossible to parse.
*/
protected final Map<String, String[]> parseQueryString(String qry) {
Map<String, String[]> rawPairs = new HashMap<String, String[]>();
if (qry == null) {
return rawPairs;
}
int inlen = qry.length();
if (inlen == 0) {
return rawPairs;
}
if (LOG.isTraceEnabled()) {
LOG.trace("Parsing query string: " + qry);
}
int iequal;
int iamper;
int startAt = 0;
boolean bDone = false;
while (!bDone) {
String n;
String v;
if ((iequal = qry.indexOf("=", startAt)) != -1) {
// End of current name=value is '&' or EOL
iamper = qry.indexOf("&", iequal);
n = qry.substring(startAt, iequal);
n = n.trim(); // deal with accidental odd chars in the URL
iequal++;
if (iequal >= inlen) {
break;
}
if (iamper == -1) {
v = qry.substring(iequal);
} else {
v = qry.substring(iequal, iamper);
}
if (iamper != -1) {
startAt = iamper + 1;
} else {
bDone = true;
}
v = v.trim(); // deal with stupid value
// add the value to the result.
String[] av = rawPairs.get(n);
if (av == null) {
av = new String[1];
av[0] = v;
rawPairs.put(n, av);
} else {
// param specified twice in the url.
String[] newVal = new String[av.length + 1];
System.arraycopy(av, 0, newVal, 0, av.length);
newVal[av.length] = v;
rawPairs.put(n, newVal);
}
} else {
break; // no more pairs
}
}
// Figure out which encoding to use to decode the params
String[] _charset_ = rawPairs.get(CHARSET_lower) == null ? rawPairs.get(CHARSET_upper) : rawPairs
.get(CHARSET_lower);
final String encoding;
if (_charset_ == null) {
encoding = null; // try to follow the spec
} else {
switch (_charset_.length) {
case 0:
throw new IllegalStateException(
"Somehow an empty _charst_ param made it into our map. Impossible...");
case 1:
encoding = _charset_[0]; // url contains an override for the
// spec
break;
default:
throw new IllegalStateException("Too many values of _charset_ found in the URL");
}
}
// Decode the raw pairs using the proper encoding and set them into the
// result map
Map<String, String[]> res = new HashMap<String, String[]>(rawPairs.size());
for (String rawKey : rawPairs.keySet()) {
String key = decode(rawKey, encoding);
String[] val = rawPairs.get(rawKey);
for (int i = 0; i < val.length; i++) {
String rawVal = val[i];
val[i] = decode(rawVal, encoding);
if (LOG.isTraceEnabled()) {
StringBuilder bf = new StringBuilder("Parsing query string. Found raw pair [name]=[value]: ");
bf.append('[').append(rawKey).append(']').append('=').append('[').append(rawVal).append(']');
bf.append(" decoded to: ");
bf.append('[').append(key).append(']').append('=').append('[').append(val[i]).append(']');
LOG.trace(bf.toString());
}
}
res.put(key, val);
}
return res;
}
/**
* Given an input map of name-value pairs, construct a query string. This
* supports multiple values for any given parameter. Names and values are
* properly encoded.
*
* @param parameters parameters to encode and place in the query string
* @return the query string, or null if no values needed to be added.
* @see #encode(String)
*/
protected final String constructQueryString(Map<String, String[]> parameters) {
StringBuilder qryStr = new StringBuilder();
for (String key : parameters.keySet()) {
String[] vals = parameters.get(key);
if (vals != null) {
// Loop through the values for the parameter
for (String val : vals) {
if (val != null && val.length() > 0) {
// Append the correct separator
if (qryStr.length() > 0) {
qryStr.append('&');
}
// Append the name and value to the URL
if (LOG.isTraceEnabled()) {
StringBuilder bf = new StringBuilder("About to add [key]=[value] to url [" + key + "]=["
+ val + "]");
bf.append(" after encoding: [").append(encode(key)).append("]=[").append(encode(val))
.append("]");
LOG.trace(bf.toString());
}
qryStr.append(encode(key)).append('=').append(encode(val));
}
}
}
}
// prepare result
if (qryStr.length() > 0) {
return qryStr.toString();
} else {
return null;
}
}
/**
* Given an array of query-string-like packed arguments, eliminate the
* specified parameters and return the packedargs parameter with the values
* stripped.
*
* @param origPackedargsStrings array of query string-like packed args.
* @param toExclude list of args to remove from the packed args.
* @return array the same length as the original array, containing the same
* values, except the <code>toExclude</code> parameters are removed.
* If all params end up getting removed, the packedargs string ends
* up being null. The array returned is never null though.
* @throws IllegalArgumentException if the input args or the input list are
* null.
*/
protected final String[] excludeFromPackedargs(String[] origPackedargsStrings, Collection<String> toExclude) {
if (origPackedargsStrings == null) {
throw new IllegalArgumentException("OrigPackedArgsStrings must not be null");
}
if (toExclude == null) {
throw new IllegalArgumentException("ToExclude list may not be null");
}
String[] newPackedargsStrings = new String[origPackedargsStrings.length];
for (int i = 0; i < origPackedargsStrings.length; i++) {
Map<String, String[]> oldPacked = parseQueryString(origPackedargsStrings[i]);
Map<String, String[]> newPacked = new HashMap<String, String[]>();
for (String opK : oldPacked.keySet()) {
if (LOG.isTraceEnabled()) {
LOG.trace("checking to see if a param should be excluded from packedargs: " + opK);
}
if (!toExclude.contains(opK)) {
newPacked.put(opK, oldPacked.get(opK));
}
}
newPackedargsStrings[i] = constructQueryString(newPacked);
}
return newPackedargsStrings;
}
}