package net.htmlparser.jericho;
import org.junit.Test;
import static org.junit.Assert.*;
import java.io.*;
import java.net.*;
import java.util.*;
public class RendererTest {
@Test public void testBasics() throws Exception {
assertEquals("text",
render("text"));
assertEquals("This is a long sentence that should be broken into multiple lines at word \nboundaries, with each line no longer than the default 76 character maximum \nline length.",
render("This is a long sentence that should be broken into multiple lines at word boundaries, with each line no longer than the default 76 character maximum line length."));
// non-renderable elements are removed:
assertEquals("text",
render("te<applet>xyz</applet>xt"));
}
@Test public void testWhiteSpace() throws Exception {
// collapse white space and trailing white space trimmed:
assertEquals(" basic test",
render(" basic test "));
// convert nbsp to normal space:
assertEquals("basic test",
render("basic test"));
// do not convert nbsp:
assertEquals("basic\u00A0 test",
renderer("basic test").setConvertNonBreakingSpaces(false).toString());
// whitespace trimmed at block boundaries:
assertEquals("basic test\nx",
render("<div> basic test </div> x"));
// whitespace trimmed at block boundaries even if there are intervening inline elements:
assertEquals("basic test",
render("<div><span> basic test </span></div>"));
// whitespace not trimmed at inline element boundaries, and none introduced:
assertEquals("x basic testy",
render("<div>x<span> basic test</span>y</div>"));
// render method used in these unit test automatically sets new line to "\n":
assertEquals("basic\ntest",
render("basic<br>test"));
// default Renderer always uses "\r\n" as new line:
assertEquals("basic\r\ntest",
new Renderer(new Source("basic<br>test")).toString());
}
@Test public void testVerticalMargins() throws Exception {
// no vertical margins around <div>, only line break:
assertEquals("a\nb\nc",
render("a<div>b</div>c"));
// only one line break around multiple successive block elements:
assertEquals("a\nb\nc",
render("a<div> <div> b </div> </div>c"));
// one line vertical margin above and below paragraphs:
assertEquals("a\n\nb\n\nc",
render("a <p> b </p> c"));
// vertical margins at start and end of text are not rendered:
assertEquals("text",
render("<p> text </p>"));
// <br> at start of text is rendered, unlike vertical margins:
assertEquals("\nx",
render("<br>x"));
// still one line vertical margin between paragraphs:
assertEquals("a\n\nb\n\nc",
render("<p> a </p><p> b </p> c"));
// divs do not affect vertical margins but ensure a line break:
assertEquals("x\na\n\nb\n\nc",
render("x <div> a <p> b </p></div> c"));
// two line vertical margin above heading blocks, and one line below:
assertEquals("a\n\n\nb\n\nc",
render("<p> a </p><h1> b </h1> c"));
// vertical margins at start and end of text are not rendered:
assertEquals("Heading\n\nparagraph",
render("<h1>Heading</h1><p>paragraph</p>"));
// include top margins at start (two lines for heading blocks):
assertEquals("\n\nHeading\n\nparagraph",
renderer("<h1>Heading</h1><p>paragraph</p>").setIncludeFirstElementTopMargin(true).toString());
}
@Test public void testCSS() throws Exception {
// no recognised styles
assertEquals("a\nb\nc",
render("a<div style='font-size: 10pt; font-family: arial'>b</div>c"));
// explicit top margin mixed with other styles:
assertEquals("a\n\nb\nc",
render("a<div style='font-size: 10pt; margin-top: 1em ; font-family: arial'>b</div>c"));
// shorthand margins:
assertEquals("a\n\n b\n\n\nc",
render("a<div style=' margin: 1em 0 2em 3em'>b</div>c"));
assertEquals("a\n\n b\n\n\nc",
render("a<div style=' margin: 1em 3em 2em'>b</div>c"));
assertEquals("a\n\n b\n\nc",
render("a<div style=' margin: 1em 3em'>b</div>c"));
assertEquals("a\n\n b\n\nc",
render("a<div style=' margin: 1em'>b</div>c"));
// explicit margins override shorthand margins:
assertEquals("a\n b\n\nc",
render("a<div style='margin-top: 0; margin: 1em'>b</div>c"));
// set top margin to 0:
assertEquals("a\nb\n\nc",
render("a<p style='margin-top: 0'>b</p>c"));
// normal blockquote has top and bottom margins of 1 and left margin of 4:
assertEquals("a\n\n b\n\nc",
render("a<blockquote>b</blockquote>c"));
// override margins with css:
assertEquals("a\nb\nc",
render("a<blockquote style='margin: 0'>b</blockquote>c"));
// bottom margin:
assertEquals("a\nb\n\nc",
render("a<div style=' margin-bottom : 1em '>b</div>c"));
// left margin:
assertEquals("x\n a\n b\ny",
render("x<div style='margin-left: 1cm'>a<br>b</div>y"));
// top padding:
assertEquals("a\n\nb\nc",
render("a<div style='padding-top: 1em'>b</div>c"));
// top padding and top margin added together:
assertEquals("a\n\n\nb\nc",
render("a<div style='padding-top: 1em; margin-top: 1em'>b</div>c"));
// percentage margins and other non-explicit lengths ignored
assertEquals("a\nb\nc",
render("a<div style='padding-top: auto; margin-top: 100%'>b</div>c"));
}
@Test public void testInvalidCSS() throws Exception {
assertEquals("a\nb\nc",render("a<div style='margin'>b</div>c"));
assertEquals("a\nb\nc",render("a<div style='margin-'>b</div>c"));
assertEquals("a\nb\nc",render("a<div style='margin-bla'>b</div>c"));
assertEquals("a\nb\nc",render("a<div style='margin-top'>b</div>c"));
assertEquals("a\nb\nc",render("a<div style='margin-top:'>b</div>c"));
assertEquals("a\nb\nc",render("a<div style='margin-top:bla'>b</div>c"));
assertEquals("a\nb\nc",render("a<div style='margin-top:;'>b</div>c"));
}
@Test public void testCSSUnits() throws Exception {
// em (= 1 line/character)
assertEquals("a\n\nb\nc",
render("a<div style='margin-top: 1em'>b</div>c"));
assertEquals("a\n\n\nb\nc",
render("a<div style='margin-top: 2em'>b</div>c"));
// ex (= 1 line/character)
assertEquals("a\n\n\nb\nc",
render("a<div style='margin-top: 2ex'>b</div>c"));
// px (= 0.125 line/character)
assertEquals("a\n\nb\nc",
render("a<div style='margin-top: 10px'>b</div>c"));
assertEquals("a\n\n\n\nb\nc", // 20px = 2.5 lines, round up to 3.
render("a<div style='margin-top: 20px'>b</div>c"));
// in (= 8 lines/characters)
assertEquals("a\n\n\n\n\nb\nc",
render("a<div style='margin-top: 0.5in'>b</div>c"));
// cm (= 3 lines/characters)
assertEquals("a\n\n\n\nb\nc",
render("a<div style='margin-top: 1cm'>b</div>c"));
// mm (= 0.3 line/character)
assertEquals("a\n\n\n\nb\nc",
render("a<div style='margin-top: 10mm'>b</div>c"));
// pt (= 0.1 line/character)
assertEquals("a\n\n\nb\nc",
render("a<div style='margin-top: 20pt'>b</div>c"));
// pc (= 1.2 line/character)
assertEquals("a\n\n\nb\nc",
render("a<div style='margin-top: 2pc'>b</div>c"));
}
@Test public void testBR() throws Exception {
// <br> between text adds a single new line:
assertEquals("x\ny",
render("x<br>y"));
// Two <br> elements adds two new lines:
assertEquals("x\n\ny",
render("x<br><br>y"));
// <br> at start and end of text is rendered, unlike vertical margins:
assertEquals("\nx\n",
render("<br>x<br>"));
// perform rest of tests in indented block:
assertEquals(" x\n y",
render("<blockquote>x<br>y</blockquote>"));
// ignore white space around <br>:
assertEquals(" x\n y",
render("<blockquote>x <br> y</blockquote>"));
// <br> after block element adds an extra new line:
assertEquals(" x\n\n y",
render("<blockquote><div>x</div><br>y</blockquote>"));
// <br> before block element is ignored :
assertEquals(" x\n y",
render("<blockquote>x<br><div>y</div></blockquote>"));
// <br> at start of block element adds an extra new line:
assertEquals(" x\n\n y",
render("<blockquote>x<div><br>y</div></blockquote>"));
// <br> before end of block element is ignored:
assertEquals(" x\n y\n z",
render("<blockquote>x<div>y<br></div>z</blockquote>"));
assertEquals(" x\n y\n\n z",
render("<blockquote>x<div>y<br></div><br>z</blockquote>"));
// <br> between two block elements adds an extra new line:
assertEquals(" x\n\n y",
render("<blockquote><div>x</div><br><div>y</div></blockquote>"));
assertEquals(" x\n\n\n y",
render("<blockquote><p>x</p><br><p>y</p></blockquote>"));
assertEquals(" x\n\n\n\n y",
render("<blockquote><p>x</p><br><h1>y</h1></blockquote>"));
// <br> at start of indenting block adds an extra new line:
assertEquals(" x\n\n\n y",
render("<blockquote>x<blockquote><br>y</blockquote></blockquote>"));
assertEquals(" x\n * \n y",
render("<blockquote>x<ul><li><br>y</ul></blockquote>"));
}
@Test public void testCustomisedBlockProperties() throws Exception {
assertEquals("a\n\nb\n\nc",render("a<p>b</p>c"));
int originalDefaultTopMargin=Renderer.getDefaultTopMargin(HTMLElementName.P);
int originalDefaultBottomMargin=Renderer.getDefaultBottomMargin(HTMLElementName.P);
boolean originalDefaultIndent=Renderer.isDefaultIndent(HTMLElementName.P);
Renderer.setDefaultTopMargin(HTMLElementName.P,0);
Renderer.setDefaultBottomMargin(HTMLElementName.P,2);
Renderer.setDefaultIndent(HTMLElementName.P,true);
assertEquals("a\n b\n\n\nc",render("a<p>b</p>c"));
Renderer.setDefaultTopMargin(HTMLElementName.P,originalDefaultTopMargin);
Renderer.setDefaultBottomMargin(HTMLElementName.P,originalDefaultBottomMargin);
Renderer.setDefaultIndent(HTMLElementName.P,originalDefaultIndent);
assertEquals("a\n\nb\n\nc",render("a<p>b</p>c"));
}
@Test public void testBlockIndents() throws Exception {
// definition list indents <dd>:
assertEquals("a\nb\n c\nd",
render("a<dl><dt>b</dt><dd>c</dd></dl>d"));
// <blockquote> has an indent as well as a one line vertical margin top and bottom:
assertEquals("a\n\n b\n\nc",
render("a<blockquote>b</blockquote>c"));
// <blockquote> as first element:
assertEquals(" b",
render("<blockquote>b</blockquote>"));
// customize the number of spaces to use in the indent:
assertEquals("a\n\n b\n\nc",
renderer("a<blockquote>b</blockquote>c").setBlockIndentSize(8).toString());
// nested indents:
assertEquals("x\n\n a\n\n b\n\n c\n\ny",
render("x<blockquote>a<blockquote>b</blockquote>c</blockquote>y"));
// nested indents mixing elements:
assertEquals("x\n\n a\n b\n c\n d\n\ny",
render("x<blockquote>a<dl><dt>b</dt><dd>c</dd></dl>d</blockquote>y"));
}
@Test public void testPRE() throws Exception {
// one line vertical margin:
assertEquals("x\n\na\n\ny",
render("x<pre>a</pre>y"));
// keep white space:
assertEquals("x\n\na b\nc \n\ny",
render("x<pre>a b\nc </pre>y"));
assertEquals("x\n\na b \n c \n\ny",
render("x<pre>a b \n c </pre>y"));
// still evaluate inline elements inside <pre>:
assertEquals("x\n\na*b*c\n\ny",
renderer("x<pre>a<b>b</b>c</pre>y").setDecorateFontStyles(true).toString());
// inside indented block:
assertEquals(" x\n\n a b\n c \n\n y",
render("<blockquote>x<pre>a b\nc </pre>y</blockquote>"));
}
@Test public void testLists() throws Exception {
// basic unordered list:
assertEquals("x\n * a\n * b\n * c\n * d\n * e\n * f\n * g\n * h\n * i\n * j\n * k\n * l\ny",
render("x<ul><li>a<li>b<li>c<li>d<li>e<li>f<li>g<li>h<li>i<li>j<li>k<li>l</ul>y"));
// list as first element:
assertEquals(" * a\n * b\n * c",
render("<ul><li>a<li>b<li>c</ul>"));
// ignore white space around list elements:
assertEquals(" * a\n * b\n * c",
render("<ul><li>a<li> b <li>c</ul>"));
// basic ordered list:
assertEquals("x\n 1. a\n 2. b\n 3. c\n 4. d\n 5. e\n 6. f\n 7. g\n 8. h\n 9. i\n 10. j\n 11. k\n 12. l\ny",
render("x<ol><li>a<li>b<li>c<li>d<li>e<li>f<li>g<li>h<li>i<li>j<li>k<li>l</ol>y"));
// customise list bullets:
assertEquals("x\n - a\n - b\n - c\ny",
renderer("x<ul><li>a<li>b<li>c</ul>y").setListBullets(new char[]{'-'}).toString());
// customise list indent size:
assertEquals("x\n * a\n * b\n * c\ny",
renderer("x<ul><li>a<li>b<li>c</ul>y").setListIndentSize(4).toString());
// wrap long lines:
assertEquals("x\n * a\n * This is a long sentence that should be broken into multiple lines at\n word boundaries, with each line no longer than the default 76\n character maximum line length.\n * c\ny",
render("x<ul><li>a<li>This is a long sentence that should be broken into multiple lines at word boundaries, with each line no longer than the default 76 character maximum line length.<li>c</ul>y"));
// mixed nested lists:
assertEquals("x\n * a\n o b\n o c\n + d\n + e\n * f\n * g\n 1. h\n 2. i\n 1. j\n 3. k\n * l\ny",
render("x<ul><li>a<ul><li>b<li>c<ul><li>d<li>e</ul></ul><li>f<li>g<ol><li>h<li>i<ol><li>j</ol><li>k</ol><li>l</ul>y"));
// list item containing only a sublist:
assertEquals("x\n * a\n * \n o b\n * c\ny",
render("x<ul><li>a<li><ul><li>b</ul><li>c</ul>y"));
// list item containing <div> element:
assertEquals("x\n * a\n * b\n * c\ny",
render("x<ul><li>a<li><div>b</div><li>c</ul>y"));
// list item containing <p> element (renders the same as IE, with top <p> vertical magin ignored but bottom vertical margin retained):
assertEquals("x\n * a\n * b\n\n * c\ny",
render("x<ul><li>a<li><p>b</p><li>c</ul>y"));
// list item containing an element that adds to the indent still ignores top vertical margin (renders the same as IE):
assertEquals("x\n * a\n * b\n c\n\n * d\ny",
render("x<ul><li>a<li><blockquote><div> b</div><div>c</div></blockquote><li>d</ul>y"));
}
@Test public void testTable() throws Exception {
// basic table with default table cell separator (space followed by tab):
assertEquals("x\nA \tB \tC\na \tb \tc\ny",
render("x<table><tr><th>A<th>B<th>C<tr><td>a<td>b<td>c</table>y"));
// custom table cell separator:
assertEquals("x\nA\tB\tC\na\tb\tc\ny",
renderer("x<table><tr><th>A<th>B<th>C<tr><td>a<td>b<td>c</table>y").setTableCellSeparator("\t").toString());
}
@Test public void testA() throws Exception {
assertEquals("My Link <http://mysite.com/>",
render("<a href=\"http://mysite.com/\">My Link</a>"));
// line break before URL if it would otherwise be longer than line length:
assertEquals("My link with a long label that goes close to the end of the line\n<http://mysite.com/>",
render("<a href=\"http://mysite.com/\">My link with a long label that goes close to the end of the line</a>"));
assertEquals("My Link",
renderer("<a href=\"http://mysite.com/\">My Link</a>").setIncludeHyperlinkURLs(false).toString());
Renderer customRenderer=new Renderer(new Source("<a href=\"http://mysite.com/\">My Link</a>")) {
public String renderHyperlinkURL(StartTag startTag) {
String href=startTag.getAttributeValue("href");
if (href==null || href.equals("#") || href.startsWith("javascript:")) return null;
return href;
}
};
assertEquals("My Link http://mysite.com/",customRenderer.toString());
}
@Test public void testAlternateText() throws Exception {
assertEquals("x [a picture] y",
render("x <img src\"picture.png\" alt=\"a picture\" /> y"));
assertEquals("x[a picture]y",
render("x<img src\"picture.png\" alt=\"a picture\" />y"));
assertEquals("x[ a picture\n]y",
render("x<img src\"picture.png\" alt=\" a picture\n\" />y"));
assertEquals("x\n[a picture]\ny",
render("<div>x</div><img src\"picture.png\" alt=\"a picture\" /><div>y</div>"));
assertEquals("An example of a line where the alternate text wraps: [this is alternate \ntext that wraps]",
render("An example of a line where the alternate text wraps: <img src\"picture.png\" alt=\"this is alternate text that wraps\" />"));
assertEquals("x y",
renderer("x <img src\"picture.png\" alt=\"a picture\" /> y").setIncludeAlternateText(false).toString());
Renderer customRenderer=new Renderer(new Source("x <img src\"picture.png\" alt=\"a picture\" /> y")) {
public String renderAlternateText(StartTag startTag) {
if (startTag.getName()==HTMLElementName.AREA) return null;
String alt=startTag.getAttributeValue("alt");
if (alt==null || alt.length()==0) return null;
return '\u00ab'+alt+'\u00bb';
}
};
assertEquals("x \u00aba picture\u00bb y",customRenderer.toString());
}
@Test public void testHR() throws Exception {
// dashes extend to 4 characters before the end of the line:
assertEquals("x\n------------------------------------------------------------------------\ny",
render("x<hr>y"));
// take indent into account:
assertEquals("x\n * ------------------------------------------------------------------\n\n --------------------------------------------------------------\n\ny",
render("x<ul><li><hr><blockquote><hr></blockquote></ul>y"));
}
@Test public void testFontStyles() throws Exception {
// default behaviour is not to decorate words:
assertEquals("a bold word",
render("a <b>bold</b> word"));
// decorate <b> and <strong> text with *:
assertEquals("a *bold* and *strong* word",
renderer("a <b>bold</b> and <strong>strong</strong> word").setDecorateFontStyles(true).toString());
// decorate <i> and <em> with /:
assertEquals("/italic/ and /emphasised/ text",
renderer("<i>italic</i> and <em>emphasised</em> text").setDecorateFontStyles(true).toString());
// decorate <u> with _:
assertEquals("_underlined_ text",
renderer("<u>underlined</u> text").setDecorateFontStyles(true).toString());
// decorate <code> with |:
assertEquals("|code| text",
renderer("<code>code</code> text").setDecorateFontStyles(true).toString());
}
private String render(String sourceText) {
return renderer(sourceText).toString();
}
private Renderer renderer(String sourceText) {
return new Renderer(new Source(sourceText)).setNewLine("\n");
}
}