/*
* Copyright (c) JForum Team
* All rights reserved.
*
* Redistribution and use in source and binary forms,
* with or without modification, are permitted provided
* that the following conditions are met:
*
* 1) Redistributions of source code must retain the above
* copyright notice, this list of conditions and the
* following disclaimer.
* 2) Redistributions in binary form must reproduce the
* above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or
* other materials provided with the distribution.
* 3) Neither the name of "Rafael Steil" nor
* the names of its contributors may be used to endorse
* or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
* HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
*
* This file creation date: 27/09/2004 23:59:10
* The JForum Project
* http://www.jforum.net
*/
package net.jforum.util;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import net.jforum.exceptions.ForumException;
import net.jforum.util.preferences.ConfigKeys;
import net.jforum.util.preferences.SystemGlobals;
import net.jforum.view.forum.common.ViewCommon;
import org.htmlparser.Attribute;
import org.htmlparser.Node;
import org.htmlparser.Tag;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.nodes.TextNode;
/**
* Process text with html and remove possible malicious tags and attributes.
* Work based on tips from Amit Klein and the following documents:
* <br>
* <li>http://ha.ckers.org/xss.html
* <li>http://quickwired.com/kallahar/smallprojects/php_xss_filter_function.php
* <br>
* @author Rafael Steil
* @version $Id: SafeHtml.java,v 1.25 2007/09/19 14:08:57 rafaelsteil Exp $
*/
public class SafeHtml
{
private static Set welcomeTags;
private static Set welcomeAttributes;
private static Set allowedProtocols;
static {
welcomeTags = new HashSet();
welcomeAttributes = new HashSet();
allowedProtocols = new HashSet();
splitAndTrim(ConfigKeys.HTML_TAGS_WELCOME, welcomeTags);
splitAndTrim(ConfigKeys.HTML_ATTRIBUTES_WELCOME, welcomeAttributes);
splitAndTrim(ConfigKeys.HTML_LINKS_ALLOW_PROTOCOLS, allowedProtocols);
}
private static void splitAndTrim(String s, Set data)
{
String s1 = SystemGlobals.getValue(s);
if (s1 == null) {
return;
}
String[] tags = s1.toUpperCase().split(",");
for (int i = 0; i < tags.length; i++) {
data.add(tags[i].trim());
}
}
/**
* Given an input, analyze each HTML tag and remove unsecure attributes from them.
* @param contents The content to verify
* @return the content, secure.
*/
public String ensureAllAttributesAreSafe(String contents)
{
StringBuffer sb = new StringBuffer(contents.length());
try {
Lexer lexer = new Lexer(contents);
Node node;
while ((node = lexer.nextNode()) != null) {
if (node instanceof Tag) {
Tag tag = (Tag)node;
this.checkAndValidateAttributes(tag, false);
sb.append(tag.toHtml());
}
else {
sb.append(node.toHtml());
}
}
}
catch (Exception e) {
throw new ForumException("Problems while parsing HTML: " + e, e);
}
return sb.toString();
}
/**
* Given an input, makes it safe for HTML displaying.
* Removes any not allowed HTML tag or attribute, as well
* unwanted Javascript statements inside the tags.
* @param contents the input to analyze
* @return the modified and safe string
*/
public String makeSafe(String contents)
{
if (contents == null || contents.length() == 0) {
return contents;
}
StringBuffer sb = new StringBuffer(contents.length());
try {
Lexer lexer = new Lexer(contents);
Node node;
while ((node = lexer.nextNode()) != null) {
boolean isTextNode = node instanceof TextNode;
if (isTextNode) {
// Text nodes are raw data, so we just
// strip off all possible html content
String text = node.toHtml();
if (text.indexOf('>') > -1 || text.indexOf('<') > -1) {
StringBuffer tmp = new StringBuffer(text);
ViewCommon.replaceAll(tmp, "<", "<");
ViewCommon.replaceAll(tmp, ">", ">");
ViewCommon.replaceAll(tmp, "\"", """);
node.setText(tmp.toString());
}
}
if (isTextNode || (node instanceof Tag && this.isTagWelcome(node))) {
sb.append(node.toHtml());
}
else {
StringBuffer tmp = new StringBuffer(node.toHtml());
ViewCommon.replaceAll(tmp, "<", "<");
ViewCommon.replaceAll(tmp, ">", ">");
sb.append(tmp.toString());
}
}
}
catch (Exception e) {
throw new ForumException("Error while parsing HTML: " + e, e);
}
return sb.toString();
}
/**
* Returns true if a given tag is allowed.
* Also, it checks and removes any unwanted attribute the tag may contain.
* @param node The tag node to analyze
* @return true if it is a valid tag.
*/
private boolean isTagWelcome(Node node)
{
Tag tag = (Tag)node;
if (!welcomeTags.contains(tag.getTagName())) {
return false;
}
this.checkAndValidateAttributes(tag, true);
return true;
}
/**
* Given a tag, check its attributes, removing those unwanted or not secure
* @param tag The tag to analyze
* @param checkIfAttributeIsWelcome true if the attribute name should be matched
* against the list of welcome attributes, set in the main configuration file.
*/
private void checkAndValidateAttributes(Tag tag, boolean checkIfAttributeIsWelcome)
{
Vector newAttributes = new Vector();
for (Iterator iter = tag.getAttributesEx().iterator(); iter.hasNext(); ) {
Attribute a = (Attribute)iter.next();
String name = a.getName();
if (name == null) {
newAttributes.add(a);
}
else {
name = name.toUpperCase();
if (a.getValue() == null) {
newAttributes.add(a);
continue;
}
String value = a.getValue().toLowerCase();
if (checkIfAttributeIsWelcome && !this.isAttributeWelcome(name)) {
continue;
}
if (!this.isAttributeSafe(name, value)) {
continue;
}
if (a.getValue().indexOf("") > -1) {
a.setValue(a.getValue().replaceAll("", "&#"));
}
newAttributes.add(a);
}
}
tag.setAttributesEx(newAttributes);
}
/**
* Check if the given attribute name is in the list of allowed attributes
* @param name the attribute name
* @return true if it is an allowed attribute name
*/
private boolean isAttributeWelcome(String name)
{
return welcomeAttributes.contains(name);
}
/**
* Check if the attribute is safe, checking either its name and value.
* @param name the attribute name
* @param value the attribute value
* @return true if it is a safe attribute
*/
private boolean isAttributeSafe(String name, String value)
{
if (name.length() >= 2 && name.charAt(0) == 'O' && name.charAt(1) == 'N') {
return false;
}
if (value.indexOf('\n') > -1 || value.indexOf('\r') > -1 || value.indexOf('\0') > -1) {
return false;
}
if (("HREF".equals(name) || "SRC".equals(name))) {
if (!this.isHrefValid(value)) {
return false;
}
}
else if ("STYLE".equals(name)) {
// It is much more a try to not allow constructions
// like style="background-color: url(javascript:xxxx)" than anything else
if (value.indexOf('(') > -1) {
return false;
}
}
return true;
}
/**
* Checks if a given address is valid
* @param href The address to check
* @return true if it is valid
*/
private boolean isHrefValid(String href)
{
if (SystemGlobals.getBoolValue(ConfigKeys.HTML_LINKS_ALLOW_RELATIVE)
&& href.length() > 0
&& href.charAt(0) == '/') {
return true;
}
for (Iterator iter = allowedProtocols.iterator(); iter.hasNext(); ) {
String protocol = iter.next().toString().toLowerCase();
if (href.startsWith(protocol)) {
return true;
}
}
return false;
}
}