SafeHtmlUtils.java example

Explorer
google-web-toolkit-svnmirror-master
/*
 * Copyright 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.gwt.safehtml.shared;

import com.google.gwt.regexp.shared.RegExp;

/**
 * Utility class containing static methods for escaping and sanitizing strings.
 */
public final class SafeHtmlUtils {

  private static final String HTML_ENTITY_REGEX = "[a-z]+|#[0-9]+|#x[0-9a-fA-F]+";

  /**
   * An empty String.
   */
  public static final SafeHtml EMPTY_SAFE_HTML = new SafeHtmlString("");

  private static final RegExp AMP_RE = RegExp.compile("&", "g");
  private static final RegExp GT_RE = RegExp.compile(">", "g");
  private static final RegExp LT_RE = RegExp.compile("<", "g");
  private static final RegExp SQUOT_RE = RegExp.compile("\'", "g");
  private static final RegExp QUOT_RE = RegExp.compile("\"", "g");

  /**
   * Returns a {@link SafeHtml} constructed from a safe string, i.e., without escaping
   * the string.
   *
   * <p>
   * <b>Important</b>: For this method to be able to honor the {@link SafeHtml}
   * contract, all uses of this method must satisfy the following constraints:
   *
   * <ol>
   *
   * <li>The argument expression must be fully determined at compile time.
   *
   * <li>The value of the argument must end in "inner HTML" context and not
   * contain incomplete HTML tags. I.e., the following is not a correct use of
   * this method, because the {@code <a>} tag is incomplete:
   *
   * <pre class="code">
   * {@code shb.appendConstantHtml("<a href='").append(url)}</pre>
   *
   * </ol>
   *
   * <p>
   * The first constraint provides a sufficient condition that the argument (and
   * any HTML markup contained in it) originates from a trusted source. The
   * second constraint ensures the composability of {@link SafeHtml} values.
   *
   * <p>
   * When executing client-side in Development Mode, or server side with
   * assertions enabled, the argument is HTML-parsed and validated to satisfy
   * the second constraint (the server-side check can also be enabled
   * programmatically, see
   * {@link SafeHtmlHostedModeUtils#maybeCheckCompleteHtml(String)} for
   * details). For performance reasons, this check is not performed in
   * Production Mode on the client, and with assertions disabled on the server.
   *
   * @param s the string to be wrapped as a {@link SafeHtml}
   * @return {@code s}, wrapped as a {@link SafeHtml}
   * @throws IllegalArgumentException if not running in Production Mode and
   *           {@code html} violates the second constraint
   */
  public static SafeHtml fromSafeConstant(String s) {
    SafeHtmlHostedModeUtils.maybeCheckCompleteHtml(s);
    return new SafeHtmlString(s);
  }

  /**
   * Returns a {@link SafeHtml} containing the escaped string.
   *
   * @param s the input String
   * @return a {@link SafeHtml} instance
   */
  public static SafeHtml fromString(String s) {
    return new SafeHtmlString(htmlEscape(s));
  }

  /**
   * Returns a {@link SafeHtml} constructed from a trusted string, i.e., without
   * escaping the string. No checks are performed. The calling code should be
   * carefully reviewed to ensure the argument meets the {@link SafeHtml} contract.
   *
   * @param s the input String
   * @return a {@link SafeHtml} instance
   */
  public static SafeHtml fromTrustedString(String s) {
    return new SafeHtmlString(s);
  }

  /**
   * HTML-escapes a character. HTML meta characters will be escaped as follows:
   * 
   * <pre>
   * & - &amp;
   * < - &lt;
   * > - &gt;
   * " - &quot;
   * ' - &#39;
   * </pre>
   *
   * @param c the character to be escaped
   * @return a string containing either the input character
   *     or an equivalent HTML Entity Reference
   */
  public static String htmlEscape(char c) {
    switch (c) {
      case '&':
        return "&";
      case '<':
        return "<";
      case '>':
        return ">";
      case '"':
        return """;
      case '\'':
        return "'";
      default:
        return "" + c;
    }
  }

  /**
   * HTML-escapes a string.
   *
   * Note: The following variants of this function were profiled on FF36,
   * Chrome6, IE8:
   * <ol>
   * <li>For each case, check indexOf, then use s.replace(regex, string)</li>
   * <li>For each case, check indexOf, then use s.replaceAll()</li>
   * <li>Check if any metachar is present using a regex, then use #1</li>
   * <li>For each case, use s.replace(regex, string)</li>
   * </ol>
   *
   * #1 was found to be the fastest, and is used below.
   *
   * @param s the string to be escaped
   * @return the input string, with all occurrences of HTML meta-characters
   *         replaced with their corresponding HTML Entity References
   */
  public static String htmlEscape(String s) {
    if (s.indexOf("&") != -1) {
      s = AMP_RE.replace(s, "&");
    }
    if (s.indexOf("<") != -1) {
      s = LT_RE.replace(s, "<");
    }
    if (s.indexOf(">") != -1) {
      s = GT_RE.replace(s, ">");
    }
    if (s.indexOf("\"") != -1) {
      s = QUOT_RE.replace(s, """);
    }
    if (s.indexOf("'") != -1) {
      s = SQUOT_RE.replace(s, "'");
    }
    return s;
  }

  /**
   * HTML-escapes a string, but does not double-escape HTML-entities already
   * present in the string.
   *
   * @param text the string to be escaped
   * @return the input string, with all occurrences of HTML meta-characters
   *         replaced with their corresponding HTML Entity References, with the
   *         exception that ampersand characters are not double-escaped if they
   *         form the start of an HTML Entity Reference
   */
  public static String htmlEscapeAllowEntities(String text) {
    StringBuilder escaped = new StringBuilder();

    boolean firstSegment = true;
    for (String segment : text.split("&", -1)) {
      if (firstSegment) {
        /*
         * The first segment is never part of an entity reference, so we always
         * escape it.
         * Note that if the input starts with an ampersand, we will get an empty
         * segment before that.
         */
        firstSegment = false;
        escaped.append(htmlEscape(segment));
        continue;
      }

      int entityEnd = segment.indexOf(';');
      if (entityEnd > 0 && segment.substring(0, entityEnd).matches(HTML_ENTITY_REGEX)) {
        // Append the entity without escaping.
        escaped.append("&").append(segment.substring(0, entityEnd + 1));

        // Append the rest of the segment, escaped.
        escaped.append(htmlEscape(segment.substring(entityEnd + 1)));
      } else {
        // The segment did not start with an entity reference, so escape the
        // whole segment.
        escaped.append("&").append(htmlEscape(segment));
      }
    }

    return escaped.toString();
  }

  // prevent instantiation
  private SafeHtmlUtils() {
  }
}