/* This code is part of Freenet. It is distributed under the GNU General * Public License, version 2 (or at your option any later version). See * http://www.gnu.org/ for further details of the GPL. */ package freenet.client.filter; import java.io.FileOutputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import java.util.LinkedHashMap; import junit.framework.TestCase; import freenet.client.filter.ContentFilter; import freenet.client.filter.DataFilterException; import freenet.client.filter.GenericReadFilterCallback; import freenet.client.filter.HTMLFilter; import freenet.client.filter.ContentFilter.FilterStatus; import freenet.client.filter.HTMLFilter.*; import freenet.clients.http.ExternalLinkToadlet; import freenet.l10n.NodeL10n; import freenet.support.Logger; import freenet.support.Logger.LogLevel; import freenet.support.io.ArrayBucket; import freenet.support.TestProperty; /** * A simple meta-test to track regressions of the content-filter * * @author Florent Daignière <nextgens@freenetproject.org> */ public class ContentFilterTest extends TestCase { private static final String BASE_URI_PROTOCOL = "http"; private static final String BASE_URI_CONTENT = "localhost:8888"; private static final String BASE_KEY = "USK@0I8gctpUE32CM0iQhXaYpCMvtPPGfT4pjXm01oid5Zc,3dAcn4fX2LyxO6uCnWFTx-2HKZ89uruurcKwLSCxbZ4,AQACAAE/Ultimate-Freenet-Index/55/"; private static final String BASE_URI = BASE_URI_PROTOCOL+"://"+BASE_URI_CONTENT+'/'; private static final String ALT_BASE_URI = BASE_URI_PROTOCOL+"://"+BASE_URI_CONTENT+'/'+BASE_KEY; private static final String EXTERNAL_LINK = "www.evilwebsite.gov"; private static final String EXTERNAL_LINK_OK = "<a />"; // check that external links are not allowed private static final String EXTERNAL_LINK_CHECK1 = "<a href=\""+EXTERNAL_LINK+"\"/>"; private static final String EXTERNAL_LINK_CHECK2 = "<a href=\""+BASE_URI_PROTOCOL+"://"+EXTERNAL_LINK+"\"/>"; private static final String EXTERNAL_LINK_CHECK3 = "<a href=\""+BASE_URI_CONTENT+"@http://"+EXTERNAL_LINK+"\"/>"; private static final String INTERNAL_RELATIVE_LINK = "<a href=\"/KSK@gpl.txt\" />"; private static final String INTERNAL_ABSOLUTE_LINK = "<a href=\""+BASE_URI+"KSK@gpl.txt\" />"; private static final String INTERNAL_RELATIVE_LINK1 = "<a href=\"test.html\" />"; // @see bug #710 private static final String ANCHOR_TEST = "<a href=\"#test\" />"; private static final String ANCHOR_TEST_EMPTY = "<a href=\"#\" />"; private static final String ANCHOR_TEST_SPECIAL = "<a href=\"#!$()*+,;=:@ABC0123-._~xyz%3f\" />"; // RFC3986 / RFC 2396 private static final String ANCHOR_TEST_SPECIAL2 = "<a href=\"#!$&'()*+,;=:@ABC0123-._~xyz%3f\" />"; private static final String ANCHOR_TEST_SPECIAL2_RESULT = "<a href=\"#!$&'()*+,;=:@ABC0123-._~xyz%3f\" />"; // @see bug #2496 private static final String ANCHOR_RELATIVE1 = "<a href=\"/KSK@test/test.html#C2\">"; private static final String ANCHOR_RELATIVE2 = "<a href=\"/KSK@test/path/test.html#C2\">"; private static final String ANCHOR_FALSE_POS1 = "<a href=\"/KSK@test/path/test.html#%23\">"; // yes, this is valid private static final String ANCHOR_FALSE_POS2 = "<a href=\"/KSK@test/path/%23.html#2\">"; // yes, this is valid too // evil hack for #2496 + #2451, <SPACE><#> give <SPACE><%23> private static final String ANCHOR_MIXED = "<a href=\"/KSK@test/path/music #1.ogg\">"; private static final String ANCHOR_MIXED_RESULT = "<a href=\"/KSK@test/path/music%20%231.ogg\">"; // @see bug #2451 private static final String POUNT_CHARACTER_ENCODING_TEST = "<a href=\"/CHK@DUiGC5D1ZsnFpH07WGkNVDujNlxhtgGxXBKrMT-9Rkw,~GrAWp02o9YylpxL1Fr4fPDozWmebhGv4qUoFlrxnY4,AAIC--8/Testing - [blah] Apostrophe' - gratuitous 1 AND CAPITAL LETTERS!!!!.ogg\" />"; private static final String POUNT_CHARACTER_ENCODING_TEST_RESULT = "<a href=\"/CHK@DUiGC5D1ZsnFpH07WGkNVDujNlxhtgGxXBKrMT-9Rkw,~GrAWp02o9YylpxL1Fr4fPDozWmebhGv4qUoFlrxnY4,AAIC--8/Testing%20-%20%5bblah%5d%20Apostrophe%27%20-%20gratuitous%201%20AND%20CAPITAL%20LETTERS%21%21%21%21.ogg\" />"; // @see bug #2297 private static final String PREVENT_FPROXY_ACCESS = "<a href=\""+BASE_URI+"\"/>"; // @see bug #2921 private static final String PREVENT_EXTERNAL_ACCESS_CSS_SIMPLE = "<style>div { background: url("+BASE_URI+") }</style>"; private static final String PREVENT_EXTERNAL_ACCESS_CSS_CASE = "<style>div { background: uRl("+BASE_URI+") }</style>"; private static final String PREVENT_EXTERNAL_ACCESS_CSS_ESCAPE = "<style>div { background: \\u\\r\\l("+BASE_URI+") }</style>"; private static final String WHITELIST_STATIC_CONTENT = "<a href=\"/static/themes/clean/theme.css\" />"; private static final String XHTML_VOIDELEMENT="<html xmlns=\"http://www.w3.org/1999/xhtml\"><br><hr></html>"; private static final String XHTML_VOIDELEMENTC="<html xmlns=\"http://www.w3.org/1999/xhtml\"><br /><hr /></html>"; private static final String XHTML_INCOMPLETEDOCUMENT="<html xmlns=\"http://www.w3.org/1999/xhtml\"><body> <h1> helloworld <h2> helloworld"; private static final String XHTML_INCOMPLETEDOCUMENTC="<html xmlns=\"http://www.w3.org/1999/xhtml\"><body> <h1> helloworld <h2> helloworld</h2></h1></body></html>"; private static final String XHTML_IMPROPERNESTING="<html xmlns=\"http://www.w3.org/1999/xhtml\"><b><i>helloworld</b></i></html>"; private static final String XHTML_IMPROPERNESTINGC="<html xmlns=\"http://www.w3.org/1999/xhtml\"><b><i>helloworld</i></b></html>"; private static final String CSS_STRING_NEWLINES = "<style>* { content: \"this string does not terminate\n}\nbody {\nbackground: url(http://www.google.co.uk/intl/en_uk/images/logo.gif); }\n\" }</style>"; private static final String CSS_STRING_NEWLINESC = "<style>* {}\nbody { }\n</style>"; private static final String HTML_STYLESHEET_MAYBECHARSET = "<link rel=\"stylesheet\" href=\"test.css\">"; private static final String HTML_STYLESHEET_MAYBECHARSETC = "<link rel=\"stylesheet\" href=\"test.css?type=text/css&maybecharset=iso-8859-1\" type=\"text/css\">"; private static final String HTML_STYLESHEET_CHARSET = "<link rel=\"stylesheet\" charset=\"utf-8\" href=\"test.css\">"; private static final String HTML_STYLESHEET_CHARSETC = "<link rel=\"stylesheet\" charset=\"utf-8\" href=\"test.css?type=text/css%3b%20charset=utf-8\" type=\"text/css\">"; private static final String HTML_STYLESHEET_CHARSET_BAD = "<link rel=\"stylesheet\" charset=\"utf-8&max-size=4194304\" href=\"test.css\">"; private static final String HTML_STYLESHEET_CHARSET_BADC = "<link rel=\"stylesheet\" href=\"test.css?type=text/css&maybecharset=iso-8859-1\" type=\"text/css\">"; private static final String HTML_STYLESHEET_CHARSET_BAD1 = "<link rel=\"stylesheet\" type=\"text/css; charset=utf-8&max-size=4194304\" href=\"test.css\">"; private static final String HTML_STYLESHEET_CHARSET_BAD1C = "<link rel=\"stylesheet\" type=\"text/css\" href=\"test.css?type=text/css&maybecharset=iso-8859-1\">"; private static final String HTML_STYLESHEET_WITH_MEDIA = "<LINK REL=\"stylesheet\" TYPE=\"text/css\"\nMEDIA=\"print, handheld\" HREF=\"foo.css\">"; private static final String HTML_STYLESHEET_WITH_MEDIAC = "<LINK rel=\"stylesheet\" type=\"text/css\" media=\"print, handheld\" href=\"foo.css?type=text/css&maybecharset=iso-8859-1\">"; private static final String FRAME_SRC_CHARSET = "<frame src=\"test.html?type=text/html; charset=UTF-8\">"; private static final String FRAME_SRC_CHARSETC = "<frame src=\"test.html?type=text/html%3b%20charset=UTF-8\">"; private static final String FRAME_SRC_CHARSET_BAD = "<frame src=\"test.html?type=text/html; charset=UTF-8&max-size=4194304\">"; private static final String FRAME_SRC_CHARSET_BADC = "<frame src=\"test.html?type=text/html%3b%20charset=UTF-8\">"; private static final String FRAME_SRC_CHARSET_BAD1 = "<frame src=\"test.html?type=text/html; charset=UTF-8%26max-size=4194304\">"; private static final String FRAME_SRC_CHARSET_BAD1C = "<frame src=\"test.html?type=text/html\">"; private static final String SPAN_WITH_STYLE = "<span style=\"font-family: verdana, sans-serif; color: red;\">"; private static final String BASE_HREF = "<base href=\"/"+BASE_KEY+"\">"; private static final String BAD_BASE_HREF = "<base href=\"/\">"; private static final String BAD_BASE_HREF2 = "<base href=\"//www.google.com\">"; private static final String BAD_BASE_HREF3 = "<base>"; private static final String BAD_BASE_HREF4 = "<base id=\"blah\">"; private static final String BAD_BASE_HREF5 = "<base href=\"http://www.google.com/\">"; private static final String DELETED_BASE_HREF = "<!-- deleted invalid base href -->"; // From CSS spec private static final String CSS_SPEC_EXAMPLE1 = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n<HTML>\n <HEAD>\n <TITLE>Bach's home page</TITLE>\n <STYLE type=\"text/css\">\n body {\n font-family: \"Gill Sans\", sans-serif;\n font-size: 12pt;\n margin: 3em;\n\n }\n </STYLE>\n </HEAD>\n <BODY>\n <H1>Bach's home page</H1>\n <P>Johann Sebastian Bach was a prolific composer.\n </BODY>\n</HTML>"; public void testHTMLFilter() throws Exception { new NodeL10n(); if (TestProperty.VERBOSE) { Logger.setupStdoutLogging(LogLevel.MINOR, "freenet.client.filter.Generic:DEBUG"); } // General sanity checks // is "relativization" working? assertEquals(INTERNAL_RELATIVE_LINK, HTMLFilter(INTERNAL_RELATIVE_LINK)); assertEquals(INTERNAL_RELATIVE_LINK, HTMLFilter(INTERNAL_RELATIVE_LINK, true)); assertEquals(INTERNAL_RELATIVE_LINK1, HTMLFilter(INTERNAL_RELATIVE_LINK1, true)); assertEquals(INTERNAL_RELATIVE_LINK, HTMLFilter(INTERNAL_ABSOLUTE_LINK)); // are external links stripped out ? assertTrue(HTMLFilter(EXTERNAL_LINK_CHECK1).startsWith(EXTERNAL_LINK_OK)); assertTrue(HTMLFilter(EXTERNAL_LINK_CHECK2).contains(ExternalLinkToadlet.PATH)); assertTrue(HTMLFilter(EXTERNAL_LINK_CHECK3).startsWith(EXTERNAL_LINK_OK)); // regression testing // bug #710 assertEquals(ANCHOR_TEST, HTMLFilter(ANCHOR_TEST)); assertEquals(ANCHOR_TEST_EMPTY, HTMLFilter(ANCHOR_TEST_EMPTY)); assertEquals(ANCHOR_TEST_SPECIAL, HTMLFilter(ANCHOR_TEST_SPECIAL)); assertEquals(ANCHOR_TEST_SPECIAL2_RESULT, HTMLFilter(ANCHOR_TEST_SPECIAL2)); // bug #2496 assertEquals(ANCHOR_RELATIVE1, HTMLFilter(ANCHOR_RELATIVE1)); assertEquals(ANCHOR_RELATIVE2, HTMLFilter(ANCHOR_RELATIVE2)); assertEquals(ANCHOR_FALSE_POS1, HTMLFilter(ANCHOR_FALSE_POS1)); assertEquals(ANCHOR_FALSE_POS2, HTMLFilter(ANCHOR_FALSE_POS2)); // EVIL HACK TEST for #2496 + #2451 assertEquals(ANCHOR_MIXED_RESULT, HTMLFilter(ANCHOR_MIXED)); // bug #2451 assertEquals(POUNT_CHARACTER_ENCODING_TEST_RESULT, HTMLFilter(POUNT_CHARACTER_ENCODING_TEST)); // bug #2297 assertTrue(HTMLFilter(PREVENT_FPROXY_ACCESS).contains(ExternalLinkToadlet.PATH)); // bug #2921 assertTrue(HTMLFilter(PREVENT_EXTERNAL_ACCESS_CSS_SIMPLE).contains("div { }")); assertTrue(HTMLFilter(PREVENT_EXTERNAL_ACCESS_CSS_ESCAPE).contains("div { }")); assertTrue(HTMLFilter(PREVENT_EXTERNAL_ACCESS_CSS_CASE).contains("div { }")); assertEquals(WHITELIST_STATIC_CONTENT, HTMLFilter(WHITELIST_STATIC_CONTENT)); assertEquals(XHTML_VOIDELEMENTC,HTMLFilter(XHTML_VOIDELEMENT)); assertEquals(XHTML_INCOMPLETEDOCUMENTC,HTMLFilter(XHTML_INCOMPLETEDOCUMENT)); assertEquals(XHTML_IMPROPERNESTINGC,HTMLFilter(XHTML_IMPROPERNESTING)); assertEquals(CSS_STRING_NEWLINESC,HTMLFilter(CSS_STRING_NEWLINES)); assertEquals(HTML_STYLESHEET_MAYBECHARSETC, HTMLFilter(HTML_STYLESHEET_MAYBECHARSET, true)); assertEquals(HTML_STYLESHEET_CHARSETC, HTMLFilter(HTML_STYLESHEET_CHARSET, true)); assertEquals(HTML_STYLESHEET_CHARSET_BADC, HTMLFilter(HTML_STYLESHEET_CHARSET_BAD, true)); assertEquals(HTML_STYLESHEET_CHARSET_BAD1C, HTMLFilter(HTML_STYLESHEET_CHARSET_BAD1, true)); assertEquals(HTML_STYLESHEET_WITH_MEDIAC, HTMLFilter(HTML_STYLESHEET_WITH_MEDIA, true)); assertEquals(FRAME_SRC_CHARSETC, HTMLFilter(FRAME_SRC_CHARSET, true)); assertEquals(FRAME_SRC_CHARSET_BADC, HTMLFilter(FRAME_SRC_CHARSET_BAD, true)); assertEquals(FRAME_SRC_CHARSET_BAD1C, HTMLFilter(FRAME_SRC_CHARSET_BAD1, true)); assertEquals(CSS_SPEC_EXAMPLE1, HTMLFilter(CSS_SPEC_EXAMPLE1)); assertEquals(SPAN_WITH_STYLE, HTMLFilter(SPAN_WITH_STYLE)); assertEquals(BASE_HREF, HTMLFilter(BASE_HREF)); assertEquals(DELETED_BASE_HREF, HTMLFilter(BAD_BASE_HREF)); assertEquals(DELETED_BASE_HREF, HTMLFilter(BAD_BASE_HREF2)); assertEquals(DELETED_BASE_HREF, HTMLFilter(BAD_BASE_HREF3)); assertEquals(DELETED_BASE_HREF, HTMLFilter(BAD_BASE_HREF4)); assertEquals(DELETED_BASE_HREF, HTMLFilter(BAD_BASE_HREF5)); } private static final String META_TIME_ONLY = "<meta http-equiv=\"refresh\" content=\"5\">"; private static final String META_TIME_ONLY_WRONG_CASE = "<meta http-equiv=\"RefResH\" content=\"5\">"; private static final String META_TIME_ONLY_TOO_SHORT = "<meta http-equiv=\"refresh\" content=\"0\">"; private static final String META_TIME_ONLY_NEGATIVE = "<meta http-equiv=\"refresh\" content=\"-5\">"; private static final String META_TIME_ONLY_BADNUM1 = "<meta http-equiv=\"refresh\" content=\"5.5\">"; private static final String META_TIME_ONLY_BADNUM2 = "<meta http-equiv=\"refresh\" content=\"\">"; private static final String META_TIME_ONLY_BADNUM_OUT = "<!-- doesn't parse as number in meta refresh -->"; private static final String META_VALID_REDIRECT = "<meta http-equiv=\"refresh\" content=\"30; url=/KSK@gpl.txt\">"; private static final String META_VALID_REDIRECT_NOSPACE = "<meta http-equiv=\"refresh\" content=\"30;url=/KSK@gpl.txt\">"; private static final String META_BOGUS_REDIRECT1 = "<meta http-equiv=\"refresh\" content=\"30; url=/\">"; private static final String META_BOGUS_REDIRECT2 = "<meta http-equiv=\"refresh\" content=\"30; url=/plugins\">"; private static final String META_BOGUS_REDIRECT3 = "<meta http-equiv=\"refresh\" content=\"30; url=http://www.google.com\">"; private static final String META_BOGUS_REDIRECT4 = "<meta http-equiv=\"refresh\" content=\"30; url=//www.google.com\">"; private static final String META_BOGUS_REDIRECT5 = "<meta http-equiv=\"refresh\" content=\"30; url=\"/KSK@gpl.txt\"\">"; private static final String META_BOGUS_REDIRECT6 = "<meta http-equiv=\"refresh\" content=\"30; /KSK@gpl.txt\">"; private static final String META_BOGUS_REDIRECT1_OUT = "<!-- Malformed URL (relative): There is no @ in that URI! ()-->"; private static final String META_BOGUS_REDIRECT2_OUT = "<!-- Malformed URL (relative): There is no @ in that URI! (plugins)-->"; private static final String META_BOGUS_REDIRECT3_OUT = "<meta http-equiv=\"refresh\" content=\"30; url=/external-link/?_CHECKED_HTTP_=http://www.google.com\">"; private static final String META_BOGUS_REDIRECT4_OUT = "<!-- Deleted invalid or dangerous URI-->"; private static final String META_BOGUS_REDIRECT5_OUT = "<!-- Malformed URL (relative): Invalid key type: \"/KSK-->"; private static final String META_BOGUS_REDIRECT_NO_URL = "<!-- no url but doesn't parse as number in meta refresh -->"; public void testMetaRefresh() throws Exception { HTMLFilter.metaRefreshSamePageMinInterval = 5; HTMLFilter.metaRefreshRedirectMinInterval = 30; assertEquals(META_TIME_ONLY, headFilter(META_TIME_ONLY)); assertEquals(META_TIME_ONLY, headFilter(META_TIME_ONLY_WRONG_CASE)); assertEquals(META_TIME_ONLY, headFilter(META_TIME_ONLY_TOO_SHORT)); assertEquals("", headFilter(META_TIME_ONLY_NEGATIVE)); assertEquals(META_TIME_ONLY_BADNUM_OUT, headFilter(META_TIME_ONLY_BADNUM1)); assertEquals(META_TIME_ONLY_BADNUM_OUT, headFilter(META_TIME_ONLY_BADNUM2)); assertEquals(META_VALID_REDIRECT, headFilter(META_VALID_REDIRECT)); assertEquals(META_VALID_REDIRECT, headFilter(META_VALID_REDIRECT_NOSPACE)); assertEquals(META_BOGUS_REDIRECT1_OUT, headFilter(META_BOGUS_REDIRECT1)); assertEquals(META_BOGUS_REDIRECT2_OUT, headFilter(META_BOGUS_REDIRECT2)); assertEquals(META_BOGUS_REDIRECT3_OUT, headFilter(META_BOGUS_REDIRECT3)); assertEquals(META_BOGUS_REDIRECT4_OUT, headFilter(META_BOGUS_REDIRECT4)); assertEquals(META_BOGUS_REDIRECT5_OUT, headFilter(META_BOGUS_REDIRECT5)); assertEquals(META_BOGUS_REDIRECT_NO_URL, headFilter(META_BOGUS_REDIRECT6)); HTMLFilter.metaRefreshSamePageMinInterval = -1; HTMLFilter.metaRefreshRedirectMinInterval = -1; assertEquals("", headFilter(META_TIME_ONLY)); assertEquals("", headFilter(META_TIME_ONLY_WRONG_CASE)); assertEquals("", headFilter(META_TIME_ONLY_TOO_SHORT)); assertEquals("", headFilter(META_TIME_ONLY_NEGATIVE)); assertEquals("", headFilter(META_TIME_ONLY_BADNUM1)); assertEquals("", headFilter(META_TIME_ONLY_BADNUM2)); assertEquals("", headFilter(META_VALID_REDIRECT)); assertEquals("", headFilter(META_VALID_REDIRECT_NOSPACE)); assertEquals("", headFilter(META_BOGUS_REDIRECT1)); assertEquals("", headFilter(META_BOGUS_REDIRECT2)); assertEquals("", headFilter(META_BOGUS_REDIRECT3)); assertEquals("", headFilter(META_BOGUS_REDIRECT4)); assertEquals("", headFilter(META_BOGUS_REDIRECT5)); assertEquals("", headFilter(META_BOGUS_REDIRECT6)); } private String headFilter(String data) throws Exception { String s = HTMLFilter("<head>"+data+"</head>"); if(s == null) return s; if(!s.startsWith("<head>")) assertTrue("Head deleted???: "+s, false); s = s.substring("<head>".length()); if(!s.endsWith("</head>")) assertTrue("Head close deleted???: "+s, false); s = s.substring(0, s.length() - "</head>".length()); return s; } public void testEvilCharset() throws IOException { // This is why we need to disallow characters before <html> !! String s = "<html><body><a href=\"http://www.google.com/\">Blah</a>"; String end = "</body></html>"; String alt = "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-16\"></head><body><a href=\"http://www.freenetproject.org/\">Blah</a></body></html>"; if((s.length()+end.length()) % 2 == 1) s += " "; s = s+end; byte[] buf; try { buf = s.getBytes("UTF-8"); } catch (UnsupportedEncodingException e) { throw new Error(e); } byte[] utf16bom = new byte[] { (byte)0xFE, (byte)0xFF }; byte[] bufUTF16 = alt.getBytes("UTF-16"); byte[] total = new byte[buf.length+utf16bom.length+bufUTF16.length]; System.arraycopy(utf16bom, 0, total, 0, utf16bom.length); System.arraycopy(buf, 0, total, utf16bom.length, buf.length); System.arraycopy(bufUTF16, 0, total, utf16bom.length+buf.length, bufUTF16.length); HTMLFilter filter = new HTMLFilter(); boolean failed = false; FileOutputStream fos; try { ArrayBucket out = new ArrayBucket(); filter.readFilter(new ArrayBucket(total).getInputStream(), out.getOutputStream(), "UTF-16", null, null); fos = new FileOutputStream("output.utf16"); fos.write(out.toByteArray()); fos.close(); failed = true; assertFalse("Filter accepted dangerous UTF8 text with BOM as UTF16! (HTMLFilter)", true); } catch (DataFilterException e) { System.out.println("Failure: "+e); e.printStackTrace(); if(e.getCause() != null) { e.getCause().printStackTrace(); } // Ok. } try { ArrayBucket out = new ArrayBucket(); FilterStatus fo = ContentFilter.filter(new ArrayBucket(total).getInputStream(), out.getOutputStream(), "text/html", null, null); fos = new FileOutputStream("output.filtered"); fos.write(out.toByteArray()); fos.close(); failed = true; assertFalse("Filter accepted dangerous UTF8 text with BOM as UTF16! (ContentFilter) - Detected charset: "+fo.charset, true); } catch (DataFilterException e) { System.out.println("Failure: "+e); e.printStackTrace(); if(e.getCause() != null) { e.getCause().printStackTrace(); } // Ok. } if(failed) { fos = new FileOutputStream("unfiltered"); fos.write(total); fos.close(); } } public static String HTMLFilter(String data) throws Exception { if(data.startsWith("<html")) return HTMLFilter(data, false); if(data.startsWith("<?")) return HTMLFilter(data, false); String s = HTMLFilter("<html>"+data+"</html>", false); assertTrue(s.startsWith("<html>")); s = s.substring("<html>".length()); assertTrue("s = \""+s+"\"", s.endsWith("</html>")); s = s.substring(0, s.length() - "</html>".length()); return s; } public static String HTMLFilter(String data, boolean alt) throws Exception { String returnValue; String typeName = "text/html"; URI baseURI = new URI(alt ? ALT_BASE_URI : BASE_URI); byte[] dataToFilter = data.getBytes("UTF-8"); ArrayBucket input = new ArrayBucket(dataToFilter); ArrayBucket output = new ArrayBucket(); InputStream inputStream = input.getInputStream(); OutputStream outputStream = output.getOutputStream(); ContentFilter.filter(inputStream, outputStream, typeName, baseURI, null, null, null); inputStream.close(); outputStream.close(); returnValue = output.toString(); output.free(); input.free(); return returnValue; } static public class TagVerifierTest extends TestCase { static String tagname; LinkedHashMap<String, String> attributes; ParsedTag HTMLTag; TagVerifier verifier; HTMLFilter filter; HTMLFilter.HTMLParseContext pc; @Override public void setUp() throws Exception { filter = new HTMLFilter(); attributes = new LinkedHashMap<String, String>(); pc = filter.new HTMLParseContext(null, null, "utf-8", new GenericReadFilterCallback(new URI(ALT_BASE_URI), null, null, null), false); } @Override public void tearDown() { filter = null; attributes = null; pc = null; tagname = null; verifier = null; HTMLTag = null; } public void testHTMLTagWithInvalidNS() throws DataFilterException{ tagname = "html"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); //Place an invalid namespace into the tag attributes.put("xmlns", "http://www.w3.org/1909/xhtml"); //Place a unparsed attribute into the tag attributes.put("version", "-//W3C//DTD HTML 4.01 Transitional//EN"); HTMLTag = new ParsedTag(tagname, attributes); final String HTML_INVALID_XMLNS = "<html version=\"-//W3C//DTD HTML 4.01 Transitional//EN\" />"; assertEquals("HTML tag containing an invalid xmlns", HTML_INVALID_XMLNS, verifier.sanitize(HTMLTag, pc).toString()); } public void testLinkTag() throws DataFilterException { tagname = "link"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("rel", "stylesheet"); attributes.put("type", "text/css"); attributes.put("target", "_blank"); attributes.put("media", "print, handheld"); attributes.put("href", "foo.css"); HTMLTag = new ParsedTag(tagname, attributes); final String LINK_STYLESHEET = "<link rel=\"stylesheet\" type=\"text/css\" target=\"_blank\" media=\"print, handheld\" href=\"foo.css?type=text/css&maybecharset=utf-8\" />"; assertEquals("Link tag importing CSS", LINK_STYLESHEET, verifier.sanitize(HTMLTag, pc).toString()); } public void testMetaTagHTMLContentType() throws DataFilterException { tagname = "meta"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("http-equiv","Content-type"); attributes.put("content","text/html; charset=UTF-8"); HTMLTag = new ParsedTag(tagname, attributes); assertEquals("Meta tag describing HTML content-type", HTMLTag.toString(), verifier.sanitize(HTMLTag, pc).toString()); } public void testMetaTagXHTMLContentType() throws DataFilterException { tagname = "meta"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("http-equiv","Content-type"); attributes.put("content","application/xhtml+xml; charset=UTF-8"); HTMLTag = new ParsedTag(tagname, attributes); assertEquals("Meta tag describing XHTML content-type", HTMLTag.toString(), verifier.sanitize(HTMLTag, pc).toString()); } public void testMetaTagUnknownContentType() throws DataFilterException { tagname = "meta"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("http-equiv","Content-type"); attributes.put("content","want/fishsticks; charset=UTF-8"); HTMLTag = new ParsedTag(tagname, attributes); try { verifier.sanitize(HTMLTag, pc); assertTrue("Meta tag describing an unknown content-type: should throw an error", false); } catch (DataFilterException e) { // Ok. } } public void testBodyTag() throws DataFilterException { tagname = "body"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("bgcolor", "pink"); //Let's pretend the following is malicious JavaScript attributes.put("onload", "evil_scripting_magic"); HTMLTag = new ParsedTag(tagname, attributes); final String BODY_TAG = "<body bgcolor=\"pink\" />"; assertEquals("Body tag", BODY_TAG, verifier.sanitize(HTMLTag, pc).toString()); } public void testFormTag() throws DataFilterException { tagname = "form"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("method", "POST"); //Place a bad charset into the tag. This will get replaced with utf-8 attributes.put("accept-charset", "iso-8859-1"); attributes.put("action", "/library/"); HTMLTag = new ParsedTag(tagname, attributes); final String FORM_TAG = "<form method=\"POST\" accept-charset=\"UTF-8\" action=\"/library/\" enctype=\"multipart/form-data\" />"; assertEquals("Form tag", FORM_TAG, verifier.sanitize(HTMLTag, pc).toString()); } public void testInvalidFormMethod() throws DataFilterException { tagname = "form"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("method", "INVALID_METHOD"); attributes.put("action", "/library/"); HTMLTag = new ParsedTag(tagname, attributes); assertNull("Form tag with an invalid method", verifier.sanitize(HTMLTag, pc)); } public void testValidInputTag() throws DataFilterException { tagname = "input"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("type", "text"); HTMLTag = new ParsedTag(tagname, attributes); assertEquals("Input tag with a valid type", HTMLTag.toString(), verifier.sanitize(HTMLTag, pc).toString()); } public void testInvalidInputTag() throws DataFilterException { tagname = "input"; verifier = HTMLFilter.allowedTagsVerifiers.get(tagname); attributes.put("type", "INVALID_TYPE"); HTMLTag = new ParsedTag(tagname, attributes); assertNull("Input tag with an invalid type", verifier.sanitize(HTMLTag, pc)); } } public void testLowerCaseExtensions() { for(FilterMIMEType type : ContentFilter.mimeTypesByName.values()) { String ext = type.primaryExtension; if(ext != null) assertEquals(ext, ext.toLowerCase()); String[] exts = type.alternateExtensions; if(ext != null) for(String s : exts) assertEquals(s, s.toLowerCase()); } } }