/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.template.soy.shared.restricted;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.lang.reflect.Modifier;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import junit.framework.AssertionFailedError;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.ContextFactory;
import org.mozilla.javascript.ScriptableObject;
/**
* Make sure that the escapers preserve containment consistently in both Java and JavaScript.
*
* <p>
*/
@RunWith(JUnit4.class)
public class EscapingConventionsTest {
@Rule public final TestName testName = new TestName();
@Test
public void testAllEscapersIterated() {
// Make sure that all Escapers are present in getAllEscapers().
Set<String> actual = Sets.newLinkedHashSet();
Set<String> expected = Sets.newLinkedHashSet();
for (EscapingConventions.CrossLanguageStringXform directive :
EscapingConventions.getAllEscapers()) {
expected.add(directive.getClass().getSimpleName());
}
for (Class<?> clazz : EscapingConventions.class.getClasses()) {
if (EscapingConventions.CrossLanguageStringXform.class.isAssignableFrom(clazz)
&& !Modifier.isAbstract(clazz.getModifiers())) {
actual.add(clazz.getSimpleName());
}
}
assertEquals(expected, actual);
}
@Test
public void testJavaScriptStringDirective() throws Exception {
assertEscaping(
// The {$s} in the below is replaced with a bunch of malicious strings.
"var x = '{$s}', y = \"{$s}\";\n/* foo */",
// But the untrusted strings are defanged by the escape directive.
"escapeJsString",
// A lexer is applied to the below.
JS_LEXER,
// And we should get these tokens back, but with any possible value for the nulls, since
// the actual escaped strings depend on the untrusted string.
"var",
" ",
"x",
" ",
"=",
" ",
null,
",",
" ",
"y",
" ",
"=",
" ",
null,
";",
"\n",
"/* foo */");
}
@Test
public void testJavaRegexStringDirective() throws Exception {
assertEscaping(
"var x = /foo-{$s}/; x.test('foo-bar');",
"escapeJsRegex",
JS_LEXER,
"var",
" ",
"x",
" ",
"=",
" ",
null,
";",
" ",
"x",
".",
"test",
"(",
"'foo-bar'",
");");
}
@Test
public void testHtmlDirective() throws Exception {
assertEscaping(
"<div><!-- {$s} --></div>", "escapeHtml", HTML_LEXER, "<div", ">", null, "</div", ">");
}
@Test
public void testHtmlRcdataDirective() throws Exception {
assertEscaping(
"<textarea>'{$s}'</textarea>",
"escapeHtmlRcdata",
HTML_LEXER,
"<textarea",
">",
null,
"</textarea",
">");
}
@Test
public void testHtmlAttributeDirective() throws Exception {
assertEscaping(
"<div title=\"{$s}\" class='{$s}'>",
"escapeHtmlAttribute",
HTML_LEXER,
"<div",
" ",
"title=",
null,
" ",
"class=",
null,
">");
}
@Test
public void testHtmlAttributeNospaceDirective() throws Exception {
assertEscaping(
"<div title=\"{$s}\" class='{$s}' id=x{$s}>",
"escapeHtmlAttributeNospace",
HTML_LEXER,
"<div",
" ",
"title=",
null,
" ",
"class=",
null,
" ",
"id=",
null,
">");
}
@Test
public void testFilterHtmlElementNameDirective() throws Exception {
assertEscaping(
"<h{$s} id=foo onclick='foo()'>",
"filterHtmlElementName",
HTML_LEXER,
(String) null,
" ",
"id=",
"foo",
" ",
null,
"'foo()'",
">");
}
@Test
public void testFilterHtmlAttributeDirective() throws Exception {
assertEscaping(
"<h1 id=foo on{$s}='foo()'>",
"filterHtmlAttributes",
HTML_LEXER,
"<h1",
" ",
"id=",
"foo",
" ",
null,
"'foo()'",
">");
}
@Test
public void testCssDirective() throws Exception {
assertEscaping(
"div { font-family: \"{$s}\", '{$s}';\n"
+ " background-image: url('{$s}'); border-image: url(\"${s}\") }",
"escapeCssString",
CSS_LEXER,
"div",
" ",
"{",
" ",
"font-family",
":",
" ",
null,
",",
" ",
null,
";",
"\n ",
"background-image",
":",
" ",
null,
";",
" ",
"border-image",
":",
" ",
null,
" ",
"}");
}
@Test
public void testCssValueDirective() throws Exception {
assertEscaping(
"div#id-{$s}.class-{$s} { color: red; border-color: #33f; margin: 0 -2px 4.5 .25in }",
"filterCssValue",
CSS_LEXER,
UNTRUSTED_VALUES,
"div",
null,
null,
" ",
"{",
" ",
"color",
":",
" ",
"red",
";",
" ",
"border-color",
":",
" ",
"#33f",
";",
" ",
"margin",
":",
" ",
"0",
" ",
"-2px",
" ",
"4.5",
" ",
".25in",
" ",
"}");
}
@Test
public void testUriDirective() throws Exception {
assertEscaping(
"http://foo{$s}/bar{$s}?foo={$s}&{$s}=bar#{$s}1",
"escapeUri",
URI_LEXER,
"http",
"://",
null,
"/",
null,
"?",
"foo",
"=",
null,
"&",
null,
"=",
"bar",
"#",
null);
// Test containment in HTML.
assertEscaping(
"<a href={$s}.html><a href='{$s}.html'><a href=\"{$s}.html\">",
"escapeUri",
HTML_LEXER,
"<a",
" ",
"href=",
null,
">",
"<a",
" ",
"href=",
null,
">",
"<a",
" ",
"href=",
null,
">");
// Test containment in CSS.
assertEscaping(
"border-image: url({$s}) url('{$s}') url(\"{$s}\");",
"escapeUri",
CSS_LEXER,
"border-image",
":",
" ",
null,
" ",
null,
" ",
null,
";");
}
@Test
public void testNormUriDirective() throws Exception {
assertEscaping(
"{$s}?foo=bar#s={$s}",
"normalizeUri",
URI_LEXER,
ImmutableList.of("http://www.google.com/O'Leary"),
"http",
"://",
"www.google.com",
"/",
"O%27Leary",
"?",
"foo",
"=",
"bar",
"#",
"s=http://www.google.com/O%27Leary");
}
@Test
public void testTestFramework() throws Exception {
// Make sure that a lexer can fail.
// Using |escapeHtml on an unquoted attribute is not allowed.
try {
assertEscaping(
"<div title={$s}>",
"escapeHtml",
HTML_LEXER,
ImmutableList.of("foo onclick=alert(42)"),
"<div",
" ",
"title=",
null,
">");
} catch (AssertionError err) {
return;
}
fail("Expected failure.");
}
@Test
public void testEscaperInterface() throws Exception {
// Test the escape method.
assertEquals("Hello", EscapingConventions.EscapeUri.INSTANCE.escape("Hello"));
assertEquals(
"%0Aletters%C2%85%E1%88%B4%E2%80%A8",
EscapingConventions.EscapeUri.INSTANCE.escape("\nletters\u0085\u1234\u2028"));
StringBuilder sb;
// And the Appendable version.
sb = new StringBuilder();
EscapingConventions.EscapeUri.INSTANCE
.escape(sb)
.append("Hello")
.append("\nletters\u0085\u1234\u2028");
assertEquals("Hello%0Aletters%C2%85%E1%88%B4%E2%80%A8", sb.toString());
// And the Appendable substring version.
sb = new StringBuilder();
EscapingConventions.EscapeUri.INSTANCE
.escape(sb)
.append("--Hello--", 2, 7)
.append("--\nletters\u0085\u1234\u2028--", 2, 13);
assertEquals("Hello%0Aletters%C2%85%E1%88%B4%E2%80%A8", sb.toString());
// And the Appendable char version.
sb = new StringBuilder();
EscapingConventions.EscapeUri.INSTANCE
.escape(sb)
.append('H')
.append('i')
.append('\n')
.append('\u0085')
.append('\u1234');
assertEquals("Hi%0A%C2%85%E1%88%B4", sb.toString());
}
private static final String SUBSTITUTION_POINT = "{$s}";
/**
* Create a lexer used by unittests to check that maliciously injected values can't violate the
* boundaries of string literals, comments, identifiers, etc. in template code.
*/
private static Function<String, List<String>> makeLexer(final String... regexParts) {
return new Function<String, List<String>>() {
@Override
public List<String> apply(String src) {
ImmutableList.Builder<String> tokens = ImmutableList.builder();
Pattern token = Pattern.compile(Joiner.on("").join(regexParts), Pattern.DOTALL);
while (src.length() != 0) {
Matcher m = token.matcher(src);
if (m.find()) {
tokens.add(m.group());
src = src.substring(m.end());
} else {
throw new IllegalArgumentException("Cannot lex `" + src + "`");
}
}
return tokens.build();
}
};
}
/** Glosses over regexular expression, number, and punctuation boundaries. */
private static final Function<String, List<String>> JS_LEXER =
makeLexer(
"^(?:",
// A double quoted string not containing a newline.
"\"(?:[^\\\\\"\r\n\u2028\u2029]|\\\\.)*\"|",
// A single quoted string not containing a newline.
"'(?:[^\\\\\'\r\n\u2028\u2029]|\\\\.)*'|",
// A C style block comment.
"/\\*.*?\\*/|",
// A C++ style line comment.
"//[^\r\n\u2028\u2029]*|",
// Space.
"\\s+|",
// A run of word characters or numbers.
"\\w+|", // A simplification of numbers.
// A run of punctuation.
"[^\\s\"\'/\\w]+|",
// A division operator.
"/=?(?![\\S])|", // A simplification of div ops vs regexs.
// A regular expression literal.
"/(?:",
// Regular expression character other than an escape or charset.
"[^\r\n\u2028\u2029\\\\/\\[]|",
// An escape sequence.
"\\\\[^\r\n\u2028\u2029]|",
// A charset.
"\\[",
"(?:",
// A charset member.
"[^\\]\r\b\u2028\u2029\\\\]|",
// An escape.
"\\\\[^\r\n\u2028\u2029]",
")*",
"\\]",
")*/",
")");
private static final Function<String, List<String>> HTML_LEXER =
makeLexer(
"^(?:",
// Beginning of a tag including its name.
"</?[\\w:-]+|",
// An HTML style comment.
"<!--[^<>\"']*-->|",
// Spaces.
"\\s+|",
// End of a tag.
"/?>|",
// An attribute name and equal sign.
"[\\w:-]+=|",
// A double quoted attribute value.
"\"[^\"<>]*\"|",
// A single quoted attribute value.
"\'[^\'<>]*\'|",
// An IE back quoted attribute value.
"`[^`]*`|",
// Raw HTML text (excl. quotes), or unquoted attribute value.
"(?:[^\\s<>\"'`=]|[\\w:-](?!=))+",
")");
private static final Function<String, List<String>> CSS_LEXER =
makeLexer(
"^(?i:", // CSS is case insensitive
// Escaping text span start or end. Allowed in CSS.
"<!--|",
"-->|",
// A double quoted string.
"\"(?:[^\\\\\"\r\n\f]|\\\\.)*\"|",
// A single quoted string.
"'(?:[^\\\\\'\r\n\f]|\\\\.)*'|",
// An identifier (other than url), hash color literal, or quantity
"(?!url\\b)[.#@!]?(?:[\\w-]|\\.[0-9]|\\\\[0-9a-f]+[ \t\r\n\f]?)+%?|",
// A C style comments. Line comments are non-standard in CSS.
"/\\*.*?\\*+/|",
// Punctuation.
"[:{}();,~]|",
// A url literal.
"url\\(\\s*(?:",
// Double quoted.
"\"(?:[^\\\\\"\r\n\f]|\\\\.)*\"|",
// Single quoted.
"'(?:[^\\\\\'\r\n\f]|\\\\.)*'|",
// Unquoted.
"(?:[!#$%&*-\\[\\]-~\u0080-\uffff]|\\\\[0-9a-f]+[ \t\r\n\f]?)*",
")\\s*\\)|",
// Space.
"\\s+",
")");
/** Lexes URIs returning each of the parts defined in RFC3986 for hierarchical URIs separately. */
private static final Function<String, List<String>> URI_LEXER =
new Function<String, List<String>>() {
@Override
public List<String> apply(String s) {
Matcher m =
Pattern.compile( // Pattern from RFC 3986 Appendix B.
"^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\\?([^#]*))?(?:#(.*))?",
Pattern.DOTALL)
.matcher(s);
assertTrue(m.find());
String scheme = m.group(1);
String authority = m.group(2);
String path = m.group(3);
String query = m.group(4);
String fragment = m.group(5);
ImmutableList.Builder<String> out = ImmutableList.builder();
if (scheme != null) {
out.add(scheme);
out.add("://");
}
if (authority != null) {
out.add(authority);
}
if (path != null) {
int pos = 0;
int queryLen = path.length();
for (int i = 0; i < queryLen; ++i) {
if (path.charAt(i) == '/') {
if (pos != i) {
out.add(path.substring(pos, i));
}
pos = i + 1;
out.add(path.substring(i, i + 1));
}
}
if (pos != queryLen) {
out.add(path.substring(pos));
}
}
if (query != null) {
out.add("?");
int pos = 0;
int queryLen = query.length();
for (int i = 0; i < queryLen; ++i) {
if (query.charAt(i) == '&' || query.charAt(i) == '=') {
out.add(query.substring(pos, i));
pos = i + 1;
out.add(query.substring(i, i + 1));
}
}
out.add(query.substring(pos));
}
if (fragment != null) {
out.add("#");
out.add(fragment);
}
return out.build();
}
};
/** Problematic strings to escape that should stress token boundaries. */
private static final ImmutableList<String> UNTRUSTED_VALUES =
ImmutableList.of(
"",
"foo",
"Foo",
"foo-BAR",
"h1",
// Some HTML boundaries.
"123",
"<script>",
"</script>",
"<!--",
"-->",
"<\0script",
"<![CDATA[",
"]]>",
"<div>",
">",
" />",
// Some newlines
"\n",
"\r\n",
"\r",
"\f",
"\b",
"\u2028",
"\u2029",
// String and attribute boundaries and problem characters.
"\"",
"'",
"`",
"\\",
"/i, ",
// JS and CSS comment boundaries
"/*",
"*/",
"//",
// Unquoted attribute boundaries.
" ",
"\u00A0",
// More
"\"'`/*\\*/\r\n<!-</ScRipt</style <-->",
":/?=();@././../",
"'' onclick=alert(1337)",
") expression(alert(1337)");
/**
* For the named directive, check that containment holds, by doing simple template substitution in
* each of the Java and JavaScript modes, and then lexing the result in a way that would expose
* differences in string, comment, and tag boundaries..
*
* @param templateText Text in the escaping directive's output language that contains the {@link
* #SUBSTITUTION_POINT substitution point}.
* @param directiveName The name of the escape directive to test in both Java and JavaScript.
* @param lexer Used to lex the result of running the escaping directive.
* @param expectedTokens The expected tokens from lexing templateText after replacing the
* substitution point with dynamic content escaped using directive. If a value is null, then
* it will match any token.
*/
private void assertEscaping(
String templateText,
String directiveName,
Function<String, List<String>> lexer,
String... expectedTokens)
throws Exception {
assertEscaping(templateText, directiveName, lexer, UNTRUSTED_VALUES, expectedTokens);
}
/**
* For the named directive, check that containment holds, by doing simple template substitution in
* each of the Java and JavaScript modes, and then lexing the result in a way that would expose
* differences in string, comment, and tag boundaries..
*
* @param templateText Text in the escaping directive's output language that contains the {@link
* #SUBSTITUTION_POINT substitution point}.
* @param directiveName The name of the escape directive to test in both Java and JavaScript.
* @param lexer Used to lex the result of running the escaping directive.
* @param expectedTokens The expected tokens from lexing templateText after replacing the
* substitution point with dynamic content escaped using directive. If a value is null, then
* it will match any token.
*/
private void assertEscaping(
String templateText,
String directiveName,
Function<String, List<String>> lexer,
Iterable<String> untrustedValues,
String... expectedTokens)
throws Exception {
assertTrue(templateText, templateText.contains(SUBSTITUTION_POINT));
assertTrue(untrustedValues.iterator().hasNext()); // not empty
checkEscaping(
templateText,
applyDirectiveClosure(directiveName, untrustedValues),
directiveName + ":javascript",
lexer,
Arrays.asList(expectedTokens));
}
/**
* Apply the named directive to the given strings by loading {@code soyutils_usegoog.js} into
* Rhino.
*
* @return Even elements are the raw strings, and odd elements are the corresponding escaped
* versions.
*/
private List<String> applyDirectiveClosure(String directiveName, Iterable<String> toEscape)
throws Exception {
return applyDirectiveInRhino(directiveName, toEscape, getSoyUtilsUseGoogPath());
}
private List<String> applyDirectiveInRhino(
String directiveName, Iterable<String> toEscape, String soyUtilsPath) throws Exception {
List<String> output = Lists.newArrayList();
Context context = new ContextFactory().enterContext();
context.setOptimizationLevel(-1); // Only running once.
ScriptableObject globalScope = context.initStandardObjects();
globalScope.defineProperty(
"navigator", Context.javaToJS(new Navigator(), globalScope), ScriptableObject.DONTENUM);
Reader soyutils = new InputStreamReader(new FileInputStream(soyUtilsPath), UTF_8);
try {
String basename = soyUtilsPath.substring(soyUtilsPath.lastIndexOf('/') + 1);
context.evaluateReader(globalScope, soyutils, basename, 1, null);
} finally {
soyutils.close();
}
globalScope.defineProperty(
"test_toEscape", ImmutableList.copyOf(toEscape), ScriptableObject.DONTENUM);
globalScope.defineProperty("test_output", output, ScriptableObject.DONTENUM);
context.evaluateString(
globalScope,
Joiner.on('\n')
.join(
"(function () {",
" if (typeof goog !== 'undefined') {",
// Make sure we get the innocuous value from filters and not an exception.
" goog.asserts.ENABLE_ASSERTS = goog.DEBUG = false;",
" }",
" for (var i = 0, n = test_toEscape.size(); i < n; ++i) {",
" var raw = String(test_toEscape.get(i));",
" var escaped = String(soy.$$" + directiveName + "(raw));",
" test_output.add(raw);",
" test_output.add(escaped);",
" }",
"})()"),
getClass() + ":" + testName.getMethodName(), // File name for JS traces.
1,
null);
return output;
}
/**
* Does some simple template substitution, and checks that string, comment, tag, and other token
* boundaries do not differ based on the string that was escaped.
*/
private static void checkEscaping(
String templateText,
List<String> strings,
String directiveVersion,
Function<String, List<String>> lexer,
List<String> expectedTokens) {
int numStrings = strings.size();
assertTrue(directiveVersion, numStrings != 0);
for (int i = 0; i < numStrings; i += 2) {
String unescaped = strings.get(i);
String escaped = strings.get(i + 1);
String outputCode = templateText.replace(SUBSTITUTION_POINT, escaped);
try {
List<String> tokens = lexer.apply(outputCode);
int minLen = Math.min(expectedTokens.size(), tokens.size());
for (int j = 0; j < minLen; ++j) {
String expected = expectedTokens.get(j);
String actual = tokens.get(j);
if (expected != null && !expected.equals(actual)) {
fail(
"Bad escaping `"
+ outputCode
+ "` of `"
+ unescaped
+ "` for "
+ directiveVersion
+ ". Expected `"
+ expected
+ "` but got `"
+ actual
+ "`");
}
}
if (expectedTokens.size() != minLen) {
fail("Missing tokens " + expectedTokens.subList(minLen, expectedTokens.size()));
} else if (tokens.size() != minLen) {
fail("Extra tokens " + tokens.subList(minLen, tokens.size()));
}
} catch (AssertionFailedError err) {
throw err;
} catch (Exception ex) {
AssertionFailedError err =
new AssertionFailedError(
"Failed to escape `"
+ unescaped
+ "` with "
+ directiveVersion
+ ", got `"
+ outputCode
+ "`");
err.initCause(ex);
throw err;
}
}
}
/** So we can run soyutils in Rhino. */
public static final class Navigator {
public final String userAgent = "testzilla";
}
private static String getSoyUtilsUseGoogPath() {
return "testdata/javascript/soy_usegoog_lib.js";
}
}