UriComponent.java example

Explorer
etk-component-master
package org.etk.core.rest.impl.uri;

import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.PathSegment;

import org.etk.core.rest.impl.MultivaluedMapImpl;

public final class UriComponent {
  
  /**
   * Constructor.
   */
  private UriComponent() {
  }

  // Components of URI, see http://gbiv.com/protocols/uri/rfc/rfc3986.htm
  /**
   * Scheme URI component.
   */
  public static final int       SCHEME       = 0;

  /**
   * UserInfo URI component.
   */
  public static final int       USER_INFO    = 1;

  /**
   * Host URI component.
   */
  public static final int       HOST         = 2;

  /**
   * Port URI component.
   */
  public static final int       PORT         = 3;

  /**
   * Path segment URI sub-component, it can't contains '/'.
   */
  public static final int       PATH_SEGMENT = 4;

  /**
   * Path URI components, consists of path-segments.
   */
  public static final int       PATH         = 5;

  /**
   * Query string.
   */
  public static final int       QUERY        = 6;

  /**
   * Fragment.
   */
  public static final int       FRAGMENT     = 7;

  /**
   * Scheme-specific part.
   */
  public static final int       SSP          = 8;

  // very mess :( part

  /**
   * The letters of the basic Latin alphabet.
   */
  private static final String   ALPHA        = fillTable("A-Z") + fillTable("a-z");

  /**
   * Digits.
   */
  private static final String   DIGIT        = fillTable("0-9");

  /**
   * Characters that are allowed in a URI but do not have a reserved purpose are
   * called unreserved. These include uppercase and lowercase letters, decimal
   * digits, hyphen, period, underscore, and tilde.
   * <p>
   * Unreserved = ALPHA | DIGIT | '-' | '.' | '_' | '~'
   */
  private static final String   UNRESERVED   = ALPHA + DIGIT + "-._~";

  /**
   * The subset of the reserved characters (gen-delims) is used as delimiters of
   * the generic URI components.
   */
  private static final String   GEN_DELIM    = ":/?#[]@";

  /**
   * Sub-delims characters.
   */
  private static final String   SUB_DELIM    = "!$&'()*+,;=";

  // --------------------

  /**
   * Characters that used for percent encoding.
   */
  private static final String   HEX_DIGITS   = "0123456789ABCDEF";

  /**
   * Array of legal characters for each component of URI.
   */
  private static final String[] ENCODING     = new String[9];

  // fill table
  static {
    ENCODING[SCHEME] = ALPHA + DIGIT + "+-.";
    ENCODING[USER_INFO] = UNRESERVED + SUB_DELIM + ":";
    ENCODING[HOST] = UNRESERVED + SUB_DELIM;
    ENCODING[PORT] = DIGIT;
    ENCODING[PATH_SEGMENT] = UNRESERVED + SUB_DELIM + ":@";
    ENCODING[PATH] = ENCODING[PATH_SEGMENT] + "/";
    ENCODING[QUERY] = ENCODING[PATH] + "?";
    ENCODING[FRAGMENT] = ENCODING[QUERY];
    ENCODING[SSP] = UNRESERVED + SUB_DELIM + GEN_DELIM;
  }

  /**
   * UTF-8 Charset.
   */
  private static final Charset  UTF8         = Charset.forName("UTF-8");

  /**
   * For processing statements such as 'a-z', '0-9', etc.
   * 
   * @param statement statement
   * @return string abcd...zABCD...Z0123456789
   */
  private static String fillTable(String statement) {
    StringBuffer sb = new StringBuffer();
    if (statement.length() != 3 || statement.charAt(1) != '-')
      throw new IllegalArgumentException("Illegal format of source string, e. g. A-Z");

    char end = statement.charAt(2);

    for (char c = statement.charAt(0); c <= end; c++)
      sb.append(c);

    return sb.toString();
  }

  /**
   * Encode given URI string.
   * 
   * @param str the URI string
   * @param containsUriParams true if the source string contains URI parameters
   * @param component component of URI, scheme, host, port, etc
   * @return encoded string
   */
  // TODO encoding for IPv6
  public static String encode(String str, int component, boolean containsUriParams) {
    if (str == null)
      throw new IllegalArgumentException();
    return encodingInt(str, component, containsUriParams, false);
  }
  
  /**
   * Validate content of percent-encoding string.
   * 
   * @param str the string which must be validate
   * @param component component of URI, scheme, host, port, etc
   * @param containsUriParams true if the source string contains URI parameters
   * @return the source string
   */
  // TODO validation for IPv6
  public static String validate(String str, int component, boolean containsUriParams) {
    for (int i = 0; i < str.length(); i++) {
      char ch = str.charAt(i);

      if ((ch < 128 && !needEncode(ch, component))
          || ((ch == '{' || ch == '}') && containsUriParams) || ch == '%')
        continue;

      throw new IllegalArgumentException("Illegal character, index " + i + ": " + str);
    }

    return str;
  }

  /**
   * Check string and if it does not contains any '%' characters validate it for
   * contains only valid characters. If it contains '%' then check does
   * following two character is valid hex numbers, if not then encode '%' to
   * '%25' otherwise keep characters without change, there is no double encoding.
   * 
   * @param str source string
   * @param component part of URI, e. g. schema, host, path
   * @param containsUriParams does string may contains URI templates
   * @return valid string
   */
  public static String recognizeEncode(String str, int component, boolean containsUriParams) {
    if (str == null) 
      throw new IllegalArgumentException();
    return encodingInt(str, component, containsUriParams, true);
  }
  
  /**
   * @param str source string
   * @param component part of URI, e. g. schema, host, path
   * @param containsUriParams does string may contains URI templates
   * @param recognizeEncoded must check string to avoid double encoding
   * @return valid string
   */
  private static String encodingInt(String str,
                                    int component,
                                    boolean containsUriParams,
                                    boolean recognizeEncoded) {
    StringBuffer sb = null;
    int l = str.length();
    for (int i = 0; i < l; i++) {
      char ch = str.charAt(i);

      if (ch == '%' && recognizeEncoded) {
        if (UriComponent.checkHexCharacters(str, i)) {

          if (sb != null)
            sb.append(ch).append(str.charAt(++i)).append(str.charAt(++i));

        } else {

          if (sb == null) {
            sb = new StringBuffer();
            sb.append(str.substring(0, i));
          }
          addPercentEncoded(ch, sb); // in fact add '%25'

        }
      } else if (ch < 128 && !needEncode(ch, component)) {

        if (sb != null)
          sb.append(ch);

      } else {

        if ((ch == '{' || ch == '}') && containsUriParams) {

          if (sb != null)
            sb.append(ch);

        } else {

          if (sb == null) {
            sb = new StringBuffer();
            sb.append(str.substring(0, i));
          }

          if (ch < 128)
            addPercentEncoded(ch, sb);
          else
            addUTF8Encoded(ch, sb);

        }

      }
    }

    return sb != null ? sb.toString() : str;
  }
  
  /**
   * Decode percent encoded URI string.
   * 
   * @param str the source percent encoded string
   * @param component component of URI, scheme, host, port, etc. NOTE type of
   *          component is not used currently but will be used for decoding IPv6
   *          addresses
   * @return decoded string
   */
  // TODO decoding for IPv6
  public static String decode(String str, int component) {
    if (str == null)
      throw new IllegalArgumentException("Decoded string is null");

    int p = 0;
    int l = str.length();
    StringBuffer sb = new StringBuffer();
    
    /* NOTE spaces can be encoded with '+' */
//    if ((p = str.indexOf('%')) < 0)
//      return str; // nothing to do

//    if (l < 3)
    if (l < 3 && str.indexOf('%') > 0)
      throw new IllegalArgumentException("Mailformed string " + str);

//    if ((p = str.lastIndexOf('%')) > l - 3)
    p = str.lastIndexOf('%');
    if (p > 0 && p > l - 3)
      throw new IllegalArgumentException("Mailformed string at index " + p);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    p = 0; // reset pointer
    while (p < l) {
      char c = str.charAt(p);

      if (c != '%') {

        // NOTE can be potential problem but we can't ignore this
        if (c == '+')
          sb.append(' ');
        else
          sb.append(c);

        p++;

      } else {

        p = percentDecode(str, p, out);

        byte[] buff = out.toByteArray();

        if (buff.length == 1 && (buff[0] & 0xFF) < 128)
          sb.append((char) buff[0]);
        else
          addUTF8Decoded(buff, sb);

        out.reset();
      }
    }
    return sb.toString();
  }

  /**
   * Check must charter be encoded.
   * 
   * @param ch character
   * @param component the URI component
   * @return true if character must be encoded false otherwise
   */
  private static boolean needEncode(char ch, int component) {
    return ENCODING[component].indexOf(ch) == -1;
  }

  /**
   * Append percent encoded character in StringBuffer.
   * 
   * @param c character which must be encoded
   * @param sb StringBuffer to add character
   */
  private static void addPercentEncoded(int c, StringBuffer sb) {
    sb.append('%');
    sb.append(HEX_DIGITS.charAt(c >> 4));
    sb.append(HEX_DIGITS.charAt(c & 0x0F));
  }

  /**
   * Append UTF-8 encoded character in StringBuffer.
   * 
   * @param c character which must be encoded
   * @param sb StringBuffer to add character
   */
  private static void addUTF8Encoded(char c, StringBuffer sb) {
    ByteBuffer buf = UTF8.encode("" + c);
    while (buf.hasRemaining())
      addPercentEncoded(buf.get() & 0xFF, sb);
  }

  /**
   * Decode percent encoded string.
   * 
   * @param str the source string
   * @param p start position in string
   * @param out output buffer for decoded characters
   * @return current position in source string
   */
  private static int percentDecode(String str, int p, ByteArrayOutputStream out) {
    int l = str.length();
    for (;;) {
      char hc = getHexCharacter(str, ++p); // higher char
      char lc = getHexCharacter(str, ++p); // lower char

      int r = (Character.isDigit(hc) ? hc - '0' : hc - 'A' + 10) << 4
          | (Character.isDigit(lc) ? lc - '0' : lc - 'A' + 10);

      out.write((byte) r);
      p++;

      if (p == l || str.charAt(p) != '%')
        break;
    }

    return p;
  }
  
  /**
   * Check does two next characters after '%' represent percent-encoded
   * character.
   * 
   * @param s source string
   * @param p position of character in string
   * @return true is two characters after '%' represent percent-encoded
   *         character false otherwise
   */
  public static boolean checkHexCharacters(String s, int p) {
    if (p > s.length() - 3)
      return false;
    try {
      getHexCharacter(s, ++p);
      getHexCharacter(s, ++p);
      return true;
    } catch (IllegalArgumentException e) {
      return false;
    }
  }

  /**
   * Extract character from given string and check is it one of valid for hex
   * sequence.
   * 
   * @param s source string
   * @param p position of character in string
   * @return character
   */
  private static char getHexCharacter(String s, int p) {
    char c = s.charAt(p);
    if (Character.isLetter(c))
      c = Character.toUpperCase(c);

    if (HEX_DIGITS.indexOf(c) == -1)
      throw new IllegalArgumentException("Mailformed string at index " + p);

    return c;
  }

  /**
   * Decodes bytes to characters using the UTF-8 decoding and add them to a
   * StringBuffer.
   * 
   * @param buff source bytes
   * @param sb StringBuffer for append characters
   */
  private static void addUTF8Decoded(byte[] buff, StringBuffer sb) {
    CharBuffer cbuff = UTF8.decode(ByteBuffer.wrap(buff));
    sb.append(cbuff.toString());
  }

  /**
   * Parse path segments.
   * 
   * @param path the relative path
   * @param decode true if character must be decoded false otherwise
   * @return List of {@link PathSegment}
   */
  public static List<PathSegment> parsePathSegments(String path, boolean decode) {
    List<PathSegment> l = new ArrayList<PathSegment>();
    if (path == null || path.length() == 0)
      return l;
  
    // remove leading slash
    if (path.charAt(0) == '/')
      path = path.substring(1);
  
    int p = 0;
    int n = 0;
    while (n < path.length()) {
      n = path.indexOf('/', p);
      if (n == -1)
        n = path.length();
  
      l.add(PathSegmentImpl.fromString(path.substring(p, n), decode));
      p = n + 1;
  
    }
  
    return l;
  }

  /**
   * Parse encoded query string.
   * 
   * @param rawQuery source query string
   * @param decode if true then query parameters will be decoded
   * @return {@link MultivaluedMap} with query parameters
   */
  public static MultivaluedMap<String, String> parseQueryString(String rawQuery, boolean decode) {
    MultivaluedMap<String, String> m = new MultivaluedMapImpl();
    if (rawQuery == null || rawQuery.length() == 0)
      return m;

    int p = 0;
    int n = 0;
    while (n < rawQuery.length()) {
      n = rawQuery.indexOf('&', p);
      if (n == -1)
        n = rawQuery.length();

      String pair = rawQuery.substring(p, n);
      if (pair.length() == 0)
        continue;

      String name;
      String value = ""; // default value
      int eq = pair.indexOf('=');
      if (eq == -1) // no value, default is ""
        name = pair;
      else {
        name = pair.substring(0, eq);
        value = pair.substring(eq + 1);
      }

      m.add(decode ? decode(name, QUERY) : name, decode ? decode(value, QUERY) : value);

      p = n + 1;
    }

    return m;
  }

}