package org.jsoup.parser;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.BooleanAttribute;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import static org.junit.Assert.*;
/**
Test suite for attribute parser.
@author Jonathan Hedley, jonathan@hedley.net */
public class AttributeParseTest {
@Test public void parsesRoughAttributeString() {
String html = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />";
// should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18>
Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
Attributes attr = el.attributes();
assertEquals(7, attr.size());
assertEquals("123", attr.get("id"));
assertEquals("baz = 'bar'", attr.get("class"));
assertEquals("border: 2px", attr.get("style"));
assertEquals("", attr.get("qux"));
assertEquals("", attr.get("zim"));
assertEquals("12", attr.get("foo"));
assertEquals("18", attr.get("mux"));
}
@Test public void handlesNewLinesAndReturns() {
String html = "<a\r\nfoo='bar\r\nqux'\r\nbar\r\n=\r\ntwo>One</a>";
Element el = Jsoup.parse(html).select("a").first();
assertEquals(2, el.attributes().size());
assertEquals("bar\r\nqux", el.attr("foo")); // currently preserves newlines in quoted attributes. todo confirm if should.
assertEquals("two", el.attr("bar"));
}
@Test public void parsesEmptyString() {
String html = "<a />";
Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
Attributes attr = el.attributes();
assertEquals(0, attr.size());
}
@Test public void canStartWithEq() {
String html = "<a =empty />";
Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
Attributes attr = el.attributes();
assertEquals(1, attr.size());
assertTrue(attr.hasKey("=empty"));
assertEquals("", attr.get("=empty"));
}
@Test public void strictAttributeUnescapes() {
String html = "<a id=1 href='?foo=bar&mid<=true'>One</a> <a id=2 href='?foo=bar<qux&lg=1'>Two</a>";
Elements els = Jsoup.parse(html).select("a");
assertEquals("?foo=bar&mid<=true", els.first().attr("href"));
assertEquals("?foo=bar<qux&lg=1", els.last().attr("href"));
}
@Test public void moreAttributeUnescapes() {
String html = "<a href='&wr_id=123&mid-size=true&ok=&wr'>Check</a>";
Elements els = Jsoup.parse(html).select("a");
assertEquals("&wr_id=123&mid-size=true&ok=&wr", els.first().attr("href"));
}
@Test public void parsesBooleanAttributes() {
String html = "<a normal=\"123\" boolean empty=\"\"></a>";
Element el = Jsoup.parse(html).select("a").first();
assertEquals("123", el.attr("normal"));
assertEquals("", el.attr("boolean"));
assertEquals("", el.attr("empty"));
List<Attribute> attributes = el.attributes().asList();
assertEquals("There should be 3 attribute present", 3, attributes.size());
// Assuming the list order always follows the parsed html
assertFalse("'normal' attribute should not be boolean", attributes.get(0) instanceof BooleanAttribute);
assertTrue("'boolean' attribute should be boolean", attributes.get(1) instanceof BooleanAttribute);
assertFalse("'empty' attribute should not be boolean", attributes.get(2) instanceof BooleanAttribute);
assertEquals(html, el.outerHtml());
}
@Test public void dropsSlashFromAttributeName() {
String html = "<img /onerror='doMyJob'/>";
Document doc = Jsoup.parse(html);
assertTrue("SelfClosingStartTag ignores last character", doc.select("img[onerror]").size() != 0);
assertEquals("<img onerror=\"doMyJob\">", doc.body().html());
doc = Jsoup.parse(html, "", Parser.xmlParser());
assertEquals("<img onerror=\"doMyJob\" />", doc.html());
}
}