/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.clearsilver.jsilver.autoescape; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_CSS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_UNQUOTED_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI_START; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_HTML; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS_UNQUOTED; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_STYLE; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_CSS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI; import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI_START; import com.google.clearsilver.jsilver.exceptions.JSilverAutoEscapingException; import com.google.streamhtmlparser.ExternalState; import com.google.streamhtmlparser.HtmlParser; import com.google.streamhtmlparser.HtmlParserFactory; import com.google.streamhtmlparser.ParseException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; /** * Encapsulates auto escaping logic. */ public class AutoEscapeContext { /** * Map of content-type to corresponding {@code HtmlParser.Mode}, used by {@code setContentType} to * specify the content type of provided input. Valid values and the corresponding mode are: <br> * <table> * <tr> * <td>text/html</td> * <td>HtmlParser.Mode.HTML</td> * </tr> * <tr> * <td>text/plain</td> * <td>HtmlParser.Mode.HTML</td> * </tr> * <tr> * <td>application/javascript</td> * <td>HtmlParser.Mode.JS</td> * </tr> * <tr> * <td>application/json</td> * <td>HtmlParser.Mode.JS</td> * </tr> * <tr> * <td>text/javascript</td> * <td>HtmlParser.Mode.JS</td> * </tr> * <tr> * <td>text/css</td> * <td>HtmlParser.Mode.CSS</td> * </tr> * </table> * * @see #setContentType */ public static final Map<String, HtmlParser.Mode> CONTENT_TYPE_LIST; // These options are used to provide extra information to HtmlParserFactory.createParserInMode or // HtmlParserFactory.createParserInAttribute, which is required for certain modes. private static final HashSet<HtmlParserFactory.AttributeOptions> quotedJsAttributeOption; private static final HashSet<HtmlParserFactory.AttributeOptions> partialUrlAttributeOption; private static final HashSet<HtmlParserFactory.ModeOptions> jsModeOption; private HtmlParser htmlParser; static { quotedJsAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>(); quotedJsAttributeOption.add(HtmlParserFactory.AttributeOptions.JS_QUOTED); partialUrlAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>(); partialUrlAttributeOption.add(HtmlParserFactory.AttributeOptions.URL_PARTIAL); jsModeOption = new HashSet<HtmlParserFactory.ModeOptions>(); jsModeOption.add(HtmlParserFactory.ModeOptions.JS_QUOTED); CONTENT_TYPE_LIST = new HashMap<String, HtmlParser.Mode>(); CONTENT_TYPE_LIST.put("text/html", HtmlParser.Mode.HTML); CONTENT_TYPE_LIST.put("text/plain", HtmlParser.Mode.HTML); CONTENT_TYPE_LIST.put("application/javascript", HtmlParser.Mode.JS); CONTENT_TYPE_LIST.put("application/json", HtmlParser.Mode.JS); CONTENT_TYPE_LIST.put("text/javascript", HtmlParser.Mode.JS); CONTENT_TYPE_LIST.put("text/css", HtmlParser.Mode.CSS); } /** * Name of resource being auto escaped. Will be used in error and display messages. */ private String resourceName; public AutoEscapeContext() { this(EscapeMode.ESCAPE_AUTO, null); } /** * Create a new context in the state represented by mode. * * @param mode EscapeMode object. */ public AutoEscapeContext(EscapeMode mode) { this(mode, null); } /** * Create a new context in the state represented by mode. If a non-null resourceName is provided, * it will be used in displaying error messages. * * @param mode The initial EscapeMode for this context * @param resourceName Name of the resource being auto escaped. */ public AutoEscapeContext(EscapeMode mode, String resourceName) { this.resourceName = resourceName; htmlParser = createHtmlParser(mode); } /** * Create a new context that is a copy of the current state of this context. * * @return New {@code AutoEscapeContext} that is a snapshot of the current state of this context. */ public AutoEscapeContext cloneCurrentEscapeContext() { AutoEscapeContext autoEscapeContext = new AutoEscapeContext(); autoEscapeContext.resourceName = resourceName; autoEscapeContext.htmlParser = HtmlParserFactory.createParser(htmlParser); return autoEscapeContext; } /** * Sets the current position in the resource being auto escaped. Useful for generating detailed * error messages. * * @param line line number. * @param column column number within line. */ public void setCurrentPosition(int line, int column) { htmlParser.setLineNumber(line); htmlParser.setColumnNumber(column); } /** * Returns the name of the resource currently being auto escaped. */ public String getResourceName() { return resourceName; } /** * Returns the current line number within the resource being auto escaped. */ public int getLineNumber() { return htmlParser.getLineNumber(); } /** * Returns the current column number within the resource being auto escaped. */ public int getColumnNumber() { return htmlParser.getColumnNumber(); } private HtmlParser createHtmlParser(EscapeMode mode) { switch (mode) { case ESCAPE_AUTO: case ESCAPE_AUTO_HTML: return HtmlParserFactory.createParser(); case ESCAPE_AUTO_JS_UNQUOTED: // <script>START HERE return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, null); case ESCAPE_AUTO_JS: // <script> var a = 'START HERE return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, jsModeOption); case ESCAPE_AUTO_STYLE: // <style>START HERE return HtmlParserFactory.createParserInMode(HtmlParser.Mode.CSS, null); case ESCAPE_AUTO_ATTR: // <input text="START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, true, null); case ESCAPE_AUTO_UNQUOTED_ATTR: // <input text=START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, false, null); case ESCAPE_AUTO_ATTR_URI: // <a href="http://www.google.com/a?START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true, partialUrlAttributeOption); case ESCAPE_AUTO_UNQUOTED_ATTR_URI: // <a href=http://www.google.com/a?START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false, partialUrlAttributeOption); case ESCAPE_AUTO_ATTR_URI_START: // <a href="START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true, null); case ESCAPE_AUTO_UNQUOTED_ATTR_URI_START: // <a href=START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false, null); case ESCAPE_AUTO_ATTR_JS: // <input onclick="doClick('START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true, quotedJsAttributeOption); case ESCAPE_AUTO_ATTR_UNQUOTED_JS: // <input onclick="doClick(START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true, null); case ESCAPE_AUTO_UNQUOTED_ATTR_JS: // <input onclick=doClick('START HERE throw new JSilverAutoEscapingException( "Attempting to start HTML parser in unsupported mode" + mode, resourceName); case ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS: // <input onclick=doClick(START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, false, null); case ESCAPE_AUTO_ATTR_CSS: // <input style="START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, true, null); case ESCAPE_AUTO_UNQUOTED_ATTR_CSS: // <input style=START HERE return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, false, null); default: throw new JSilverAutoEscapingException("Attempting to start HTML parser in invalid mode" + mode, resourceName); } } /** * Parse the given data and update internal state accordingly. * * @param data Input to parse, usually the contents of a template. */ public void parseData(String data) { try { htmlParser.parse(data); } catch (ParseException e) { // ParseException displays the proper position, so do not store line and column // number here. throw new JSilverAutoEscapingException("Error in HtmlParser: " + e, resourceName); } } /** * Lets the AutoEscapeContext know that some input was skipped. * * This method will usually be called for variables in the input stream. The AutoEscapeContext is * told that the input stream contained some additional data but does not get to see the data. It * can adjust its internal state accordingly. */ public void insertText() { try { htmlParser.insertText(); } catch (ParseException e) { throw new JSilverAutoEscapingException("Error during insertText(): " + e, resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); } } /** * Determines whether an included template that begins in state {@code start} is allowed to end in * state {@code end}. Usually included templates are only allowed to end in the same context they * begin in. This lets auto escaping parse the remainder of the parent template without needing to * know the ending context of the included template. However, there is one exception where auto * escaping will allow a different ending context: if the included template is a URI attribute * value, it is allowed to change context from {@code ATTR_URI_START} to {@code ATTR_URI}. This * does not cause any issues because the including template will call {@code insertText} when it * encounters the include command, and {@code insertText} will cause the HTML parser to switch its * internal state in the same way. */ public boolean isPermittedStateChangeForIncludes(AutoEscapeState start, AutoEscapeState end) { return start.equals(end) || (start.equals(AutoEscapeState.ATTR_URI_START) && end.equals(AutoEscapeState.ATTR_URI)) || (start.equals(AutoEscapeState.UNQUOTED_ATTR_URI_START) && end .equals(AutoEscapeState.UNQUOTED_ATTR_URI)); } /** * Determine the correct escaping to apply for a variable. * * Looks at the current state of the htmlParser, and determines what escaping to apply to a * variable in this state. * * @return Name of escaping function to use in this state. */ public String getEscapingFunctionForCurrentState() { return getCurrentState().getFunctionName(); } /** * Returns the EscapeMode which will bring AutoEscapeContext into this state. * * Initializing a new AutoEscapeContext with this EscapeMode will bring it into the state that the * current AutoEscapeContext object is in. * * @return An EscapeMode object. */ public EscapeMode getEscapeModeForCurrentState() { return getCurrentState().getEscapeMode(); } /** * Calls the HtmlParser API to determine current state. * * This function is mostly a wrapper around the HtmlParser API. It gathers all the necessary * information using that API and returns a single enum representing the current state. * * @return AutoEscapeState enum representing the current state. */ public AutoEscapeState getCurrentState() { ExternalState state = htmlParser.getState(); String tag = htmlParser.getTag(); // Currently we do not do any escaping inside CSS blocks, so ignore them. if (state.equals(HtmlParser.STATE_CSS_FILE) || tag.equals("style")) { return AutoEscapeState.STYLE; } // Handle variables inside <script> tags. if (htmlParser.inJavascript() && !state.equals(HtmlParser.STATE_VALUE)) { if (htmlParser.isJavascriptQuoted()) { // <script> var a = "<?cs var: Blah ?>"; </script> return AutoEscapeState.JS; } else { // <script> var a = <?cs var: Blah ?>; </script> // No quotes around the variable, hence it can inject arbitrary javascript. // So severely restrict the values it may contain. return AutoEscapeState.JS_UNQUOTED; } } // Inside an HTML tag or attribute name if (state.equals(HtmlParser.STATE_ATTR) || state.equals(HtmlParser.STATE_TAG)) { return AutoEscapeState.ATTR; // TODO: Need a strict validation function for tag and attribute names. } else if (state.equals(HtmlParser.STATE_VALUE)) { // Inside an HTML attribute value return getCurrentAttributeState(); } else if (state.equals(HtmlParser.STATE_COMMENT) || state.equals(HtmlParser.STATE_TEXT)) { // Default is assumed to be HTML body // <b>Hello <?cs var: UserName ?></b> : return AutoEscapeState.HTML; } throw new JSilverAutoEscapingException("Invalid state received from HtmlParser: " + state.toString(), resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); } private AutoEscapeState getCurrentAttributeState() { HtmlParser.ATTR_TYPE type = htmlParser.getAttributeType(); boolean attrQuoted = htmlParser.isAttributeQuoted(); switch (type) { case REGULAR: // <input value="<?cs var: Blah ?>"> : if (attrQuoted) { return AutoEscapeState.ATTR; } else { return AutoEscapeState.UNQUOTED_ATTR; } case URI: if (htmlParser.isUrlStart()) { // <a href="<?cs var: X ?>"> if (attrQuoted) { return AutoEscapeState.ATTR_URI_START; } else { return AutoEscapeState.UNQUOTED_ATTR_URI_START; } } else { // <a href="http://www.google.com/a?x=<?cs var: X ?>"> if (attrQuoted) { // TODO: Html escaping because that is what Clearsilver does right now. // May change this to url escaping soon. return AutoEscapeState.ATTR_URI; } else { return AutoEscapeState.UNQUOTED_ATTR_URI; } } case JS: if (htmlParser.isJavascriptQuoted()) { /* * Note: js_escape() hex encodes all html metacharacters. Therefore it is safe to not do * an HTML escape around this. */ if (attrQuoted) { // <input onclick="alert('<?cs var:Blah ?>');"> return AutoEscapeState.ATTR_JS; } else { // <input onclick=alert('<?cs var: Blah ?>');> return AutoEscapeState.UNQUOTED_ATTR_JS; } } else { if (attrQuoted) { /* <input onclick="alert(<?cs var:Blah ?>);"> */ return AutoEscapeState.ATTR_UNQUOTED_JS; } else { /* <input onclick=alert(<?cs var:Blah ?>);> */ return AutoEscapeState.UNQUOTED_ATTR_UNQUOTED_JS; } } case STYLE: // <input style="border:<?cs var: FancyBorder ?>"> : if (attrQuoted) { return AutoEscapeState.ATTR_CSS; } else { return AutoEscapeState.UNQUOTED_ATTR_CSS; } default: throw new JSilverAutoEscapingException("Invalid attribute type in HtmlParser: " + type, resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); } } /** * Resets the state of the underlying html parser to a state consistent with the {@code * contentType} provided. This method should be used when the starting auto escaping context of a * resource cannot be determined from its contents - for example, a CSS stylesheet or a javascript * source file. * * @param contentType MIME type header representing the content being parsed. * @see #CONTENT_TYPE_LIST */ public void setContentType(String contentType) { HtmlParser.Mode mode = CONTENT_TYPE_LIST.get(contentType); if (mode == null) { throw new JSilverAutoEscapingException("Invalid content type specified: " + contentType, resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); } htmlParser.resetMode(mode); } /** * Enum representing states of the data being parsed. * * This enumeration lists all the states in which autoescaping would have some effect. * */ public static enum AutoEscapeState { HTML("html", ESCAPE_AUTO_HTML), JS("js", ESCAPE_AUTO_JS), STYLE("css", ESCAPE_AUTO_STYLE), JS_UNQUOTED( "js_check_number", ESCAPE_AUTO_JS_UNQUOTED), ATTR("html", ESCAPE_AUTO_ATTR), UNQUOTED_ATTR( "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR), ATTR_URI("html", ESCAPE_AUTO_ATTR_URI), UNQUOTED_ATTR_URI( "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_URI), ATTR_URI_START("url_validate", ESCAPE_AUTO_ATTR_URI_START), UNQUOTED_ATTR_URI_START("url_validate_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_URI_START), ATTR_JS("js", ESCAPE_AUTO_ATTR_JS), ATTR_UNQUOTED_JS( "js_check_number", ESCAPE_AUTO_ATTR_UNQUOTED_JS), UNQUOTED_ATTR_JS("js_attr_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_JS), UNQUOTED_ATTR_UNQUOTED_JS("js_check_number", ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS), ATTR_CSS("css", ESCAPE_AUTO_ATTR_CSS), UNQUOTED_ATTR_CSS( "css_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_CSS); private final String functionName; private final EscapeMode escapeMode; private AutoEscapeState(String functionName, EscapeMode mode) { this.functionName = functionName; this.escapeMode = mode; } public String getFunctionName() { return functionName; } public EscapeMode getEscapeMode() { return escapeMode; } } }