// Copyright (c) 2003-present, Jodd Team (http://jodd.org) // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. package jodd.lagarto.dom; import jodd.io.FileUtil; import jodd.util.StringUtil; import org.junit.Before; import org.junit.Test; import java.io.File; import java.io.IOException; import java.net.URL; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; public class MalformedTest { protected String testDataRoot; @Before public void setUp() throws Exception { if (testDataRoot != null) { return; } URL data = NodeSelectorTest.class.getResource("test"); testDataRoot = data.getFile(); } @Test public void testOneNode() { String content = "<body><div>test<span>sss</span></body>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div>test<span>sss</span></div></body>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testOneNodeWithBlanks() { String content = "<body><div> <span>sss</span></body>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div> <span>sss</span></div></body>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testTwoNodes() { String content = "<body><div>test<span><form>xxx</form></body>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div>test<span><form>xxx</form></span></div></body>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testTwoNodes2() { String content = "<body><div>test<span><form>xxx</body>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div>test<span><form>xxx</form></span></div></body>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testPeterSimple1() { String content = "<div><h1>FORELE</h1><p>dicuss<div>xxx</div></div>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<div><h1>FORELE</h1><p>dicuss</p><div>xxx</div></div>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testPeterSimple2() { String content = "<div><h1>FORELE</h1><p>dicuss<div><h2>HAB</h2><p>AMONG</div></div>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<div><h1>FORELE</h1><p>dicuss</p><div><h2>HAB</h2><p>AMONG</p></div></div>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testPeterSimple3WithSpaces() { String content = "<div> <h1>FORELE</h1> <p>dicuss <div> <h2>HAB</h2> <p>AMONG </div> </div>".toUpperCase(); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<div> <h1>FORELE</h1> <p>DICUSS </p><div> <h2>HAB</h2> <p>AMONG </p></div> </div>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testPeterFull() { String content = "<DIV class=\"section\" id=\"forest-elephants\" >\n" + "<H1>Forest elephants</H1>\n" + "<P>In this section, we discuss the lesser known forest elephants.\n" + "...this section continues...\n" + "<DIV class=\"subsection\" id=\"forest-habitat\" >\n" + "<H2>Habitat</H2>\n" + "<P>Forest elephants do not live in trees but among them.\n" + "...this subsection continues...\n" + "</DIV>\n" + "</DIV>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); String expected = "<div class=\"section\" id=\"forest-elephants\">\n" + "<h1>Forest elephants</h1>\n" + "<p>In this section, we discuss the lesser known forest elephants.\n" + "...this section continues...\n</p>" + "<div class=\"subsection\" id=\"forest-habitat\">\n" + "<h2>Habitat</h2>\n" + "<p>Forest elephants do not live in trees but among them.\n" + "...this subsection continues...\n</p>" + "</div>\n" + "</div>"; assertEquals(expected, doc.getHtml()); assertTrue(doc.check()); } @Test public void testEof() { String content = "<body><div>test"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div>test</div></body>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testEof2() { String content = "<body><div>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<body><div></div></body>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testSpanDivOverTable() { String content = "<span><div><table><tr><td>text</span>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<span><div><table><tr><td>text</td></tr></table></div></span>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testDivSpanOverTable() { String content = "<div><span><table><tr><td>text</div>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(content); assertEquals("<div><span><table><tr><td>text</td></tr></table></span></div>", doc.getHtml()); assertTrue(doc.check()); } @Test public void testTableInTableInTable() throws IOException { String html = read("tableInTable.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(html); String out = read("tableInTable-out.html", true); assertEquals(out, html(doc)); assertTrue(doc.check()); } @Test public void testFormClosesAll() throws IOException { String html = read("formClosesAll.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html(doc); String out = read("formClosesAll-out1.html", true); assertEquals(out, html); assertTrue(doc.check()); lagartoDOMBuilder.getConfig().setUseFosterRules(true); doc = lagartoDOMBuilder.parse(html); html = html(doc); out = read("formClosesAll-out2.html", true); assertEquals(out, html); } @Test public void testFoster1() { String html = "A<table>B<tr>C</tr>D</table>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.getConfig().setUseFosterRules(true); Document doc = lagartoDOMBuilder.parse(html); html = html1(doc); assertEquals("ABCD<table><tr></tr></table>", html); } @Test public void testFoster2() { String html = "A<table><tr> B</tr> C</table>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.getConfig().setUseFosterRules(true); Document doc = lagartoDOMBuilder.parse(html); html = html1(doc); assertEquals("ABC<table><tr></tr></table>", html); } @Test public void testBodyEnd() { String html = "<body><p>111</body>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html1(doc); assertEquals("<body><p>111</p></body>", html); assertNull(doc.getErrors()); } @Test public void testBodyEndWithError() { String html = "<body><p>111<h1>222</body>"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html1(doc); assertEquals("<body><p>111</p><h1>222</h1></body>", html); assertNotNull(doc.getErrors()); assertEquals(1, doc.getErrors().size()); } @Test public void testEOF() { String html = "<body><p>111"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html1(doc); assertEquals("<body><p>111</p></body>", html); assertNull(doc.getErrors()); } @Test public void testEOFWithError() { String html = "<body><p>111<h1>222"; LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html1(doc); assertEquals("<body><p>111</p><h1>222</h1></body>", html); assertNotNull(doc.getErrors()); assertEquals(1, doc.getErrors().size()); } @Test public void testCrazySpan() throws IOException { String html = read("spancrazy.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableHtmlPlusMode(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html(doc); String out = read("spancrazy-out.html", true); assertEquals(out, html); assertEquals(3, doc.getErrors().size()); } @Test public void testFosterForm() throws IOException { String html = read("fosterForm.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableHtmlPlusMode(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html(doc); String out = read("fosterForm-out.html", true); assertEquals(out, html); assertNull(doc.getErrors()); } @Test public void testListCrazy() throws IOException { String html = read("listcrazy.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableHtmlPlusMode(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html(doc); String out = read("listcrazy-out.html", true); assertEquals(out, html); assertEquals(1, doc.getErrors().size()); } @Test public void testTable1() throws IOException { String html = read("table1.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableHtmlPlusMode(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html(doc); String out = read("table1-out.html", true); assertEquals(out, html); } @Test public void testTable2() throws IOException { String html = read("table2.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableHtmlPlusMode(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html(doc); String out = read("table2-out.html", true); assertEquals(out, html); } @Test public void smtest() throws IOException { String html = read("smtest.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); lagartoDOMBuilder.enableHtmlPlusMode(); lagartoDOMBuilder.enableDebug(); Document doc = lagartoDOMBuilder.parse(html); html = html(doc); String out = read("smtest-out.html", true); // still not working out = StringUtil.remove(out, "<tbody>\n"); out = StringUtil.remove(out, "</tbody>\n"); html = StringUtil.replace(html, "<td>\nnotworking</td>", "<tr>\n<td>\nnotworking</td>\n</tr>"); assertEquals(out, html); } @Test public void testDecodingQuotes() throws IOException { String html = read("decode.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(html); Element td1 = (Element) doc.getChild(0, 1, 1, 1, 1); String td1attr = td1.getAttribute("onclick"); Element td2 = (Element) doc.getChild(0, 1, 1, 3, 1); String td2attr = td2.getAttribute("onclick"); html = html(doc); String out = read("decode-out.html", true); assertEquals(out, html); // now re-parse the generated html String newHtml = doc.getHtml(); lagartoDOMBuilder = new LagartoDOMBuilder(); doc = lagartoDOMBuilder.parse(newHtml); td1 = (Element) doc.getChild(0, 1, 1, 1, 1); assertEquals(td1attr, td1.getAttribute("onclick")); td2 = (Element) doc.getChild(0, 1, 1, 3, 1); assertEquals(td2attr, td2.getAttribute("onclick")); } @Test public void testQuotes() throws IOException { String html = read("quotes.html", false); LagartoDOMBuilder lagartoDOMBuilder = new LagartoDOMBuilder(); Document doc = lagartoDOMBuilder.parse(html); html = html(doc); String out = read("quotes-out.html", true); assertEquals(out, html); } // ---------------------------------------------------------------- util /** * Reads test file and returns its content optionally stripped. */ protected String read(String filename, boolean strip) throws IOException { String data = FileUtil.readString(new File(testDataRoot, filename)); if (strip) { data = strip(data); } return data; } protected String strip(String string) { string = StringUtil.removeChars(string, " \r\n\t"); string = StringUtil.replace(string, ">", ">\n"); return string; } /** * Parses HTML and returns the stripped html. */ protected String html(Document document) { String html = document.getHtml(); html = strip(html); return html; } protected String html1(Document document) { String html = document.getHtml(); html = StringUtil.removeChars(html, " \r\n\t"); return html; } }