/******************************************************************************* * Copyright (c) 2012, 2014 Tasktop Technologies. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * David Green - initial API and implementation * Billy Huang - Bug 396332 *******************************************************************************/ package org.eclipse.mylyn.wikitext.internal.parser.html; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import org.eclipse.mylyn.wikitext.internal.parser.html.HtmlCleaner; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.TextNode; import org.junit.Test; /** * @author David Green */ public class HtmlCleanerTest { @Test public void testFirstNode_MoveWhitespaceOutside() { String result = clean("<p>foo <span style=\"color:blue;\"> bar</span></p>"); assertTrue(result, result.contains("<p>foo <span style=\"color: blue;\">bar</span></p>")); } @Test public void testFirstNode_MoveWhitespaceOutside2() { String result = clean("<p>foo <span style=\"color:blue;\"> <br/>bar</span></p>"); assertTrue(result.contains("<p>foo <br /><span style=\"color: blue;\">bar</span></p>")); } @Test public void testLastNode_MoveWhitespaceOutside() { String result = clean("<p>foo <span style=\"color:blue;\"><br/>bar<br/> </span></p>"); assertTrue(result.contains("<p>foo <br /><span style=\"color: blue;\">bar</span><br /></p>")); } @Test public void testLastNode_MoveWhitespaceOutside2() { String result = clean("<p>foo <span style=\"color:blue;\"><br/>bar<br/>ab </span></p>"); assertTrue(result.contains("<p>foo <br /><span style=\"color: blue;\">bar<br />ab</span></p>")); } @Test public void testEmptyFontTag() { String result = clean("<p>foo <font color=\"red\"> </font>bar</p>"); assertTrue(result.contains("<p>foo bar</p>")); } @Test public void testFontTag_Black() { String result = clean("<p>foo <font color=\"black\"> bar</font></p>"); assertTrue(result.contains("<p>foo <span style=\"color: black;\">bar</span></p>")); } @Test public void testFontTag_Nothing() { String result = clean("<p>foo <font color=\" \"> bar</font></p>"); assertTrue(result.contains("<p>foo bar</p>")); } @Test public void testRemoveExcessiveStyles() { String result = clean("<p>foo <span > bar</span></p>"); assertTrue(result.contains("<p>foo bar</p>")); } @Test public void preserveSpanWithClass() { String result = clean("<p>foo <span class=\"test\">bar</span></p>"); assertTrue(result.contains("<p>foo <span class=\"test\">bar</span></p>")); } @Test public void preserveSpanWithUnderlineTextDecoration() { String result = clean("<p>foo <span style=\"text-decoration: underline;\">bar</span></p>"); assertTrue(result.contains("<p>foo <span style=\"text-decoration: underline;\">bar</span></p>")); } @Test public void preserveSpanWithUnderlineTextDecorationRemoveOthers() { String result = clean( "<p>foo <span style=\"ignoreThis: too; text-decoration: underline; bogus: bad\">bar</span></p>"); assertTrue(result.contains("<p>foo <span style=\"text-decoration: underline;\">bar</span></p>")); } @Test public void preserveSpanWithLinethroughTextDecoration() { String result = clean("<p>foo <span style=\"text-decoration: line-through;\">bar</span></p>"); assertTrue(result.contains("<p>foo <span style=\"text-decoration: line-through;\">bar</span></p>")); } @Test public void preserveSpanWithLinethroughTextDecorationRemoveOthers() { String result = clean( "<p>foo <span style=\"text-decoration: line-through; bogus: bad; ignoreThis: too\">bar</span></p>"); assertTrue(result.contains("<p>foo <span style=\"text-decoration: line-through;\">bar</span></p>")); } @Test public void testRemoveExcessiveStyles_lots_of_styles() { String result = clean( "<p>foo <span style=\"font-style: italic;font-weight: bold; color: blue; bogus: bad; ignoreThis: too\"> bar</span></p>"); assertTrue(result .contains("<p>foo <span style=\"font-style: italic;font-weight: bold;color: blue;\">bar</span></p>")); } @Test public void testRemoveExcessiveStyles_lots_of_styles2() { String result = clean("<p>foo <span style=\"bogus: bad; ignoreThis: too\"> bar</span></p>"); assertTrue(result.contains("<p>foo bar</p>")); } @Test public void testRemoveSpanContainingOnlyWhitespace() { String result = clean("<p>foo<span> </span>bar</p>"); assertTrue(result.contains("<p>foo bar</p>")); } @Test public void testRepairSpanContainingMalformedCssColorStyle3CharactersNonHex() { String result = clean("<p><span style=\"color: 123\">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: #123;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingMalformedCssColorStyle6CharactersNonHex() { String result = clean("<p><span style=\"color: 123456\">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: #123456;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingMalformedCssColorStyle3CharactersHex() { String result = clean("<p><span style=\"color: adc\">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: #adc;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingMalformedCssColorStyle6CharactersHex() { String result = clean("<p><span style=\"color: afcebd\">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: #afcebd;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingMalformedCssColorStyle6CharactersMixed() { String result = clean("<p><span style=\"color: A1B2C3\">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: #A1B2C3;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingValidCssColorStyleHexNotChanged() { String result = clean("<p><span style=\"color: #ABCDEF\">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: #ABCDEF;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingValidCssColorStyleNonHexNotChanged() { String result = clean("<p><span style=\"color: red\">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: red;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingValidCssColorStyleNonHexLotsOfStylesNotChanged() { String result = clean("<p><span style=\"font-style: italic;font-weight: bold;color: red\">foo bar</span></p>"); assertTrue(result .contains("<p><span style=\"font-style: italic;font-weight: bold;color: red;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingMalformedCssColorStyle6CharactersMixedWithImportantDeclaration() { String result = clean("<p><span style=\"color: A1B2C3 !important\">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: #A1B2C3 !important;\">foo bar</span></p>")); } @Test public void testRepairSpanContainingMalformedCssColorStyle6CharactersMixedWithImportantDeclarationLotsOfWhitespace() { String result = clean("<p><span style=\" color: A1B2C3 ! important \">foo bar</span></p>"); assertTrue(result.contains("<p><span style=\"color: #A1B2C3 ! important;\">foo bar</span></p>")); } @Test public void testTrailingWhitespaceBodyNoBlock() { // bug 406943 String result = cleanToBody("<html>\n<body>\ntext\n</body>\n</html>"); assertEquals("<body>text</body>", result); } @Test public void testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody() { // bug 406943 String result = cleanToBody("<html>\n<body>\ntext\n</body>\n</html>"); assertEquals("<body>text</body>", result); } @Test public void testWhitespacesBetweenListItems() { String result = cleanToBody("<body><ol><li>item 0</li> <li>item 1</li>\t\n <li>item 2</li></ol></body>"); assertEquals("<body><ol><li>item 0</li><li>item 1</li><li>item 2</li></ol></body>", result); } @Test public void testWhitespacesBeforeBrTag() { // bug 433872 String result = cleanToBody("<body>\n <br/> foo</body>"); assertEquals("<body><br /> foo</body>", result); } @Test public void testWhitespacesBeforeBrTag2() { // bug 433872 String result = cleanToBody("<body><hr>\n <br/> foo</body>"); assertEquals("<body><hr /><br /> foo</body>", result); } @Test public void testTextAndWhitespacesBeforeBrTag() { // bug 433872 String result = cleanToBody("<body>foo \n <br/> bar</body>"); assertEquals("<body>foo<br /> bar</body>", result); } @Test public void testTextBeforeBrTag() { // bug 433872 String result = cleanToBody("<body>foo<br/> bar</body>"); assertEquals("<body>foo<br /> bar</body>", result); } @Test public void testTagBeforeBrTag() { // bug 433872 String result = cleanToBody("<body><hr/><br/> bar</body>"); assertEquals("<body><hr /><br /> bar</body>", result); } @Test public void testWhitespacesPreservedAroundHrTag() { String result = cleanToBody("<body>foo\n <hr/> foo</body>"); assertEquals("<body>foo <hr /> foo</body>", result); } @Test public void testWhitespacesPreservedAroundDivTag() { String result = cleanToBody("<body>foo\n <div>bar</div> foo</body>"); assertEquals("<body>foo <div>bar</div> foo</body>", result); } @Test public void testWhitespacesBetweenOrderedListAndItems() { String result = cleanToBody("<body><ol> <li>item 0</li> \n</ol></body>"); assertEquals("<body><ol><li>item 0</li></ol></body>", result); } @Test public void testWhitespacesBetweenUnorderedListAndItems() { String result = cleanToBody("<body><ul> <li>item 0</li> \n</ul></body>"); assertEquals("<body><ul><li>item 0</li></ul></body>", result); } @Test public void testWhitespacesBetweenTableCells() { String result = cleanToBody( "<body><table><tbody><tr><th>cell 0.0</th> <th>cell 0.1</th>\t\n <th>cell 0.2</th></tr> <tr><td>cell 1.0</td> <td>cell 1.1</td> \t\r\n<td>cell 1.2</td></tr> <tr><td>cell 2.0</td> <td>cell 2.1</td> <td>cell 2.2</td></th></tbody></table></body>"); assertEquals( "<body><table><tbody><tr><th>cell 0.0</th><th>cell 0.1</th><th>cell 0.2</th></tr><tr><td>cell 1.0</td><td>cell 1.1</td><td>cell 1.2</td></tr><tr><td>cell 2.0</td><td>cell 2.1</td><td>cell 2.2</td></tr></tbody></table></body>", result); } @Test public void testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody2() { // bug 406943 Document document = Document.createShell(""); document.body().appendChild(new TextNode("\n", "")); document.body().appendChild(new TextNode("text", "")); document.body().appendChild(new TextNode("\n", "")); document.body().appendChild(new TextNode("\n", "")); String result = cleanToBody(document); assertEquals("<body>text</body>", result); } @Test public void preformattedWhitespacePreserve() { assertEquals("<body><pre> \none\r\ntwo\n</pre></body>", cleanToBody("<pre> \none\r\ntwo\n</pre>")); } @Test public void preformattedCodeWhitespacePreserve() { assertEquals("<body><pre><code> \none\r\ntwo\n</code></pre></body>", cleanToBody("<pre><code> \none\r\ntwo\n</code></pre>")); } private String cleanToBody(String originalHtml) { Document document = Jsoup.parse(originalHtml); return cleanToBody(document); } private String cleanToBody(Document document) { new HtmlCleaner().apply(document); document.outputSettings().prettyPrint(false); String result = document.body().outerHtml(); return result; } private String clean(String originalHtml) { Document document = Jsoup.parse(originalHtml); new HtmlCleaner().apply(document); document.outputSettings().prettyPrint(false); String result = document.outerHtml(); return result; } }