SafeHtml.java example

Explorer
trydone-master
/*
 * Copyright (c) JForum Team
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, 
 * with or without modification, are permitted provided 
 * that the following conditions are met:
 * 
 * 1) Redistributions of source code must retain the above 
 * copyright notice, this list of conditions and the 
 * following  disclaimer.
 * 2)  Redistributions in binary form must reproduce the 
 * above copyright notice, this list of conditions and 
 * the following disclaimer in the documentation and/or 
 * other materials provided with the distribution.
 * 3) Neither the name of "Rafael Steil" nor 
 * the names of its contributors may be used to endorse 
 * or promote products derived from this software without 
 * specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT 
 * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
 * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 * IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
 * 
 * This file creation date: 27/09/2004 23:59:10
 * The JForum Project
 * http://www.jforum.net
 */
package net.jforum.util;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;

import net.jforum.exceptions.ForumException;
import net.jforum.util.preferences.ConfigKeys;
import net.jforum.util.preferences.SystemGlobals;
import net.jforum.view.forum.common.ViewCommon;

import org.htmlparser.Attribute;
import org.htmlparser.Node;
import org.htmlparser.Tag;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.nodes.TextNode;

/**
 * Process text with html and remove possible malicious tags and attributes.
 * Work based on tips from Amit Klein and the following documents:
 * <br>
 * <li>http://ha.ckers.org/xss.html
 * <li>http://quickwired.com/kallahar/smallprojects/php_xss_filter_function.php
 * <br>
 * @author Rafael Steil
 * @version $Id: SafeHtml.java,v 1.25 2007/09/19 14:08:57 rafaelsteil Exp $
 */
public class SafeHtml 
{
	private static Set welcomeTags;
	private static Set welcomeAttributes;
	private static Set allowedProtocols;
	
	static {
		welcomeTags = new HashSet();
		welcomeAttributes = new HashSet();
		allowedProtocols = new HashSet();
		
		splitAndTrim(ConfigKeys.HTML_TAGS_WELCOME, welcomeTags);
		splitAndTrim(ConfigKeys.HTML_ATTRIBUTES_WELCOME, welcomeAttributes);
		splitAndTrim(ConfigKeys.HTML_LINKS_ALLOW_PROTOCOLS, allowedProtocols);
	}
	
	private static void splitAndTrim(String s, Set data)
	{
		String s1 = SystemGlobals.getValue(s);
		
		if (s1 == null) {
			return;
		}
		
		String[] tags = s1.toUpperCase().split(",");

		for (int i = 0; i < tags.length; i++) {
			data.add(tags[i].trim());
		}
	}
	
	/**
	 * Given an input, analyze each HTML tag and remove unsecure attributes from them. 
	 * @param contents The content to verify
	 * @return the content, secure. 
	 */
	public String ensureAllAttributesAreSafe(String contents) 
	{
		StringBuffer sb = new StringBuffer(contents.length());
		
		try {
			Lexer lexer = new Lexer(contents);
			Node node;
			
			while ((node = lexer.nextNode()) != null) {
				if (node instanceof Tag) {
					Tag tag = (Tag)node;
					
					this.checkAndValidateAttributes(tag, false);
					
					sb.append(tag.toHtml());
				}
				else {
					sb.append(node.toHtml());
				}
			}
		}
		catch (Exception e) {
			throw new ForumException("Problems while parsing HTML: " + e, e);
		}
		
		return sb.toString();
	}
	
	/**
	 * Given an input, makes it safe for HTML displaying. 
	 * Removes any not allowed HTML tag or attribute, as well
	 * unwanted Javascript statements inside the tags. 
	 * @param contents the input to analyze
	 * @return the modified and safe string
	 */
	public String makeSafe(String contents)
	{
		if (contents == null || contents.length() == 0) {
			return contents;
		}
		
		StringBuffer sb = new StringBuffer(contents.length());
		
		try {
			Lexer lexer = new Lexer(contents);
			Node node;
			
			while ((node = lexer.nextNode()) != null) {
				boolean isTextNode = node instanceof TextNode;
				
				if (isTextNode) {
					// Text nodes are raw data, so we just
					// strip off all possible html content
					String text = node.toHtml();
					
					if (text.indexOf('>') > -1 || text.indexOf('<') > -1) {
						StringBuffer tmp = new StringBuffer(text);
						
						ViewCommon.replaceAll(tmp, "<", "<");
						ViewCommon.replaceAll(tmp, ">", ">");
						ViewCommon.replaceAll(tmp, "\"", """);
						
						node.setText(tmp.toString());
					}
				}
				
				if (isTextNode || (node instanceof Tag && this.isTagWelcome(node))) {
					sb.append(node.toHtml());
				}
				else {
					StringBuffer tmp = new StringBuffer(node.toHtml());
					
					ViewCommon.replaceAll(tmp, "<", "<");
					ViewCommon.replaceAll(tmp, ">", ">");
					
					sb.append(tmp.toString());
				}
			}
		}
		catch (Exception e) {
			throw new ForumException("Error while parsing HTML: " + e, e);
		}
		
		return sb.toString();
	}
	
	/**
	 * Returns true if a given tag is allowed. 
	 * Also, it checks and removes any unwanted attribute the tag may contain. 
	 * @param node The tag node to analyze
	 * @return true if it is a valid tag. 
	 */
	private boolean isTagWelcome(Node node)
	{
		Tag tag = (Tag)node;

		if (!welcomeTags.contains(tag.getTagName())) {
			return false;
		}
		
		this.checkAndValidateAttributes(tag, true);
		
		return true;
	}
	
	/**
	 * Given a tag, check its attributes, removing those unwanted or not secure 
	 * @param tag The tag to analyze
	 * @param checkIfAttributeIsWelcome true if the attribute name should be matched
	 * against the list of welcome attributes, set in the main configuration file. 
	 */
	private void checkAndValidateAttributes(Tag tag, boolean checkIfAttributeIsWelcome)
	{
		Vector newAttributes = new Vector();
		
		for (Iterator iter = tag.getAttributesEx().iterator(); iter.hasNext(); ) {
			Attribute a = (Attribute)iter.next();

			String name = a.getName();
			
			if (name == null) {
				newAttributes.add(a);
			}
			else {
				name = name.toUpperCase();
				
				if (a.getValue() == null) {
					newAttributes.add(a);
					continue;
				}
				
				String value = a.getValue().toLowerCase();
				
				if (checkIfAttributeIsWelcome && !this.isAttributeWelcome(name)) {
					continue;
				}
				
				if (!this.isAttributeSafe(name, value)) {
					continue;
				}
					
				if (a.getValue().indexOf("&#") > -1) {
					a.setValue(a.getValue().replaceAll("&#", "&#"));
				}
				
				newAttributes.add(a);
			}
		}
		
		tag.setAttributesEx(newAttributes);
	}
	
	/**
	 * Check if the given attribute name is in the list of allowed attributes
	 * @param name the attribute name
	 * @return true if it is an allowed attribute name
	 */
	private boolean isAttributeWelcome(String name)
	{
		return welcomeAttributes.contains(name);
	}

	/**
	 * Check if the attribute is safe, checking either its name and value. 
	 * @param name the attribute name
	 * @param value the attribute value
	 * @return true if it is a safe attribute
	 */
	private boolean isAttributeSafe(String name, String value)
	{
		if (name.length() >= 2 && name.charAt(0) == 'O' && name.charAt(1) == 'N') {
			return false;
		}
		
		if (value.indexOf('\n') > -1 || value.indexOf('\r') > -1 || value.indexOf('\0') > -1) {
			return false;
		}
			
		if (("HREF".equals(name) || "SRC".equals(name))) {
			if (!this.isHrefValid(value)) {
				return false;
			}
		}
		else if ("STYLE".equals(name)) {
			// It is much more a try to not allow constructions
			// like style="background-color: url(javascript:xxxx)" than anything else
			if (value.indexOf('(') > -1) {
				return false;
			}
		}
		
		return true;
	}
	
	/**
	 * Checks if a given address is valid
	 * @param href The address to check
	 * @return true if it is valid
	 */
	private boolean isHrefValid(String href) 
	{
		if (SystemGlobals.getBoolValue(ConfigKeys.HTML_LINKS_ALLOW_RELATIVE)
			&& href.length() > 0 
			&& href.charAt(0) == '/') {
			return true;
		}
		
		for (Iterator iter = allowedProtocols.iterator(); iter.hasNext(); ) {
			String protocol = iter.next().toString().toLowerCase();
			
			if (href.startsWith(protocol)) {
				return true;
			}
		}
		
		return false;
	}
}