package org.dcache.util; import com.google.common.base.CharMatcher; import java.net.URI; import java.net.URISyntaxException; /** * This class provides utility methods for encoding UTF-8 String data using * various formats. The methods either provide missing functionality or * contain work-arounds for buggy library implementations. * * Most methods provide a method with a signature that takes a StringBuilder * to which the encoded form is appended, and a method with the same name * but that returns the encoded value as a String. */ public class StringMarkup { // See RFC 822 for definition of quoted-string special private static final CharMatcher QUOTE_STRING_SPECIAL = CharMatcher.anyOf("\\\""); private static final String SCHEME_FILE = "file"; private static final int SCHEME_FILE_LENGTH = SCHEME_FILE.length(); /** * Provides the quoted-string markup, as defined in RFC 822. This is * a simple markup where '\' before any character makes that character * a literal. * * Any occurrence of a backslash or double-quote character is marked up and * the resulting string is placed in double-quotes. * @param sb The StringBuilder to append the marked-up value * @param src The unencoded string. * @return the StringBuilder. */ public static StringBuilder quotedString(StringBuilder sb, String src) { sb.append('\"'); for(int i = 0; i < src.length(); i++) { char c = src.charAt(i); if(QUOTE_STRING_SPECIAL.matches(c)) { sb.append('\\'); } sb.append(c); } sb.append('\"'); return sb; } /** * Provides the quoted-string markup, as defined in RFC 822. This is * a simple markup where '\' before any character makes that character * a literal. * * Any occurrence of a backslash or double-quote character is marked up and * the resulting string is placed in double-quotes. * @param src The unencoded string. * @return the encoded string. */ public static String quotedString(String src) { return quotedString(new StringBuilder(), src).toString(); } /** * The string is encoded by mapping the characters to bytes using UTF-8 * and any reserved characters are marked up using percent symbol followed * by two hexadecimal digits from the set {'0'-'9', 'A'-'F'}. * This is in accordance with RFC 3986. * @param sb The StringBuilder to append the marked-up value * @param src The unencoded string. * @return the StringBuilder. * @throws RuntimeException if the path is somehow illegal. */ public static StringBuilder percentEncode(StringBuilder sb, String src) { return sb.append(percentEncode(src)); } /** * The string is encoded by mapping the characters to bytes using UTF-8 * and any reserved characters are marked up using percent symbol followed * by two hexadecimal digits from the set {'0'-'9', 'A'-'F'}. * This is in accordance with RFC 3986. * @param sb The StringBuilder to append the marked-up value * @param src The unencoded string. * @return the StringBuilder. * @throws RuntimeException if the path is somehow illegal. */ public static String percentEncode(String src) { URI uri; /* * This method contains a work-around for a JRE bug: * * https://bugs.openjdk.java.net/show_bug.cgi?id=100223 * * We should be able to use the four-argument constructor to obtain * the encoded form of the path element: * * uri = new URI(null, null, path, null); * uri.toASCIIString() * * However, this can fail if the path contains a colon. Instead, we * use the "file" scheme and ensure the path is absolute. The code * then strips off the initial "file:/" to obtain the encoded path. */ try { uri = new URI(SCHEME_FILE, null, '/' + src, null); } catch (URISyntaxException e) { throw new RuntimeException("illegal path element: " + e.getMessage(), e); } String encoded = uri.toASCIIString(); int idx = SCHEME_FILE_LENGTH +2; // +2 for ':/' in 'file:/' return encoded.substring(idx, encoded.length()); } }