/* * Copyright 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.google.gwt.safehtml.shared; import com.google.gwt.regexp.shared.RegExp; /** * Utility class containing static methods for escaping and sanitizing strings. */ public final class SafeHtmlUtils { private static final String HTML_ENTITY_REGEX = "[a-z]+|#[0-9]+|#x[0-9a-fA-F]+"; /** * An empty String. */ public static final SafeHtml EMPTY_SAFE_HTML = new SafeHtmlString(""); private static final RegExp AMP_RE = RegExp.compile("&", "g"); private static final RegExp GT_RE = RegExp.compile(">", "g"); private static final RegExp LT_RE = RegExp.compile("<", "g"); private static final RegExp SQUOT_RE = RegExp.compile("\'", "g"); private static final RegExp QUOT_RE = RegExp.compile("\"", "g"); /** * Returns a {@link SafeHtml} constructed from a safe string, i.e., without escaping * the string. * * <p> * <b>Important</b>: For this method to be able to honor the {@link SafeHtml} * contract, all uses of this method must satisfy the following constraints: * * <ol> * * <li>The argument expression must be fully determined at compile time. * * <li>The value of the argument must end in "inner HTML" context and not * contain incomplete HTML tags. I.e., the following is not a correct use of * this method, because the {@code <a>} tag is incomplete: * * <pre class="code"> * {@code shb.appendConstantHtml("<a href='").append(url)}</pre> * * </ol> * * <p> * The first constraint provides a sufficient condition that the argument (and * any HTML markup contained in it) originates from a trusted source. The * second constraint ensures the composability of {@link SafeHtml} values. * * <p> * When executing client-side in Development Mode, or server side with * assertions enabled, the argument is HTML-parsed and validated to satisfy * the second constraint (the server-side check can also be enabled * programmatically, see * {@link SafeHtmlHostedModeUtils#maybeCheckCompleteHtml(String)} for * details). For performance reasons, this check is not performed in * Production Mode on the client, and with assertions disabled on the server. * * @param s the string to be wrapped as a {@link SafeHtml} * @return {@code s}, wrapped as a {@link SafeHtml} * @throws IllegalArgumentException if not running in Production Mode and * {@code html} violates the second constraint */ public static SafeHtml fromSafeConstant(String s) { SafeHtmlHostedModeUtils.maybeCheckCompleteHtml(s); return new SafeHtmlString(s); } /** * Returns a {@link SafeHtml} containing the escaped string. * * @param s the input String * @return a {@link SafeHtml} instance */ public static SafeHtml fromString(String s) { return new SafeHtmlString(htmlEscape(s)); } /** * Returns a {@link SafeHtml} constructed from a trusted string, i.e., without * escaping the string. No checks are performed. The calling code should be * carefully reviewed to ensure the argument meets the {@link SafeHtml} contract. * * @param s the input String * @return a {@link SafeHtml} instance */ public static SafeHtml fromTrustedString(String s) { return new SafeHtmlString(s); } /** * HTML-escapes a character. HTML meta characters will be escaped as follows: * * <pre> * & - &amp; * < - &lt; * > - &gt; * " - &quot; * ' - &#39; * </pre> * * @param c the character to be escaped * @return a string containing either the input character * or an equivalent HTML Entity Reference */ public static String htmlEscape(char c) { switch (c) { case '&': return "&"; case '<': return "<"; case '>': return ">"; case '"': return """; case '\'': return "'"; default: return "" + c; } } /** * HTML-escapes a string. * * Note: The following variants of this function were profiled on FF36, * Chrome6, IE8: * <ol> * <li>For each case, check indexOf, then use s.replace(regex, string)</li> * <li>For each case, check indexOf, then use s.replaceAll()</li> * <li>Check if any metachar is present using a regex, then use #1</li> * <li>For each case, use s.replace(regex, string)</li> * </ol> * * #1 was found to be the fastest, and is used below. * * @param s the string to be escaped * @return the input string, with all occurrences of HTML meta-characters * replaced with their corresponding HTML Entity References */ public static String htmlEscape(String s) { if (s.indexOf("&") != -1) { s = AMP_RE.replace(s, "&"); } if (s.indexOf("<") != -1) { s = LT_RE.replace(s, "<"); } if (s.indexOf(">") != -1) { s = GT_RE.replace(s, ">"); } if (s.indexOf("\"") != -1) { s = QUOT_RE.replace(s, """); } if (s.indexOf("'") != -1) { s = SQUOT_RE.replace(s, "'"); } return s; } /** * HTML-escapes a string, but does not double-escape HTML-entities already * present in the string. * * @param text the string to be escaped * @return the input string, with all occurrences of HTML meta-characters * replaced with their corresponding HTML Entity References, with the * exception that ampersand characters are not double-escaped if they * form the start of an HTML Entity Reference */ public static String htmlEscapeAllowEntities(String text) { StringBuilder escaped = new StringBuilder(); boolean firstSegment = true; for (String segment : text.split("&", -1)) { if (firstSegment) { /* * The first segment is never part of an entity reference, so we always * escape it. * Note that if the input starts with an ampersand, we will get an empty * segment before that. */ firstSegment = false; escaped.append(htmlEscape(segment)); continue; } int entityEnd = segment.indexOf(';'); if (entityEnd > 0 && segment.substring(0, entityEnd).matches(HTML_ENTITY_REGEX)) { // Append the entity without escaping. escaped.append("&").append(segment.substring(0, entityEnd + 1)); // Append the rest of the segment, escaped. escaped.append(htmlEscape(segment.substring(entityEnd + 1))); } else { // The segment did not start with an entity reference, so escape the // whole segment. escaped.append("&").append(htmlEscape(segment)); } } return escaped.toString(); } // prevent instantiation private SafeHtmlUtils() { } }