package com.github.mefi.jkuuza.parser;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import static org.junit.matchers.JUnitMatchers.*;
/**
*
* @author mefi
*/
public class LinksExtractorTest {
public LinksExtractorTest() {
}
Document doc = null;
LinksExtractor extractor = null;
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
doc = new Document("http://example.com");
extractor = new LinksExtractor(doc);
}
@After
public void tearDown() {
}
/**
* Test of getInternalLinks method, of class LinksExtractor.
*/
@Test
public void testGetInternalLinks() {
System.out.println("getInternalLinks");
String html = "";
String host = "";
String expectedUrl = "";
html = "<a href=\"http://example.com\">link</a>";
host = "example.com";
expectedUrl = "http://example.com";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
html = "<a href=\"http://example.com\"></a>";
host = "example.com";
expectedUrl = "http://example.com";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
html = "<a href=\"/foo\">link</a>";
host = "example.com";
expectedUrl = "http://example.com/foo";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
html = "<a href=\"\">link</a>";
host = "example.com";
expectedUrl = "http://example.com";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
html = "<a href=\"foo/\">link</a>";
host = "example.com";
expectedUrl = "http://example.com/foo/";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
html = "<a href=\"foo.php?bar=baz\">link</a>";
host = "example.com";
expectedUrl = "http://example.com/foo.php?bar=baz";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
html = "<a href=\"foo.htm#bar\">link</a>";
host = "example.com";
expectedUrl = "http://example.com/foo.htm";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
html = "<a href=\"foo\">link";
host = "example.com";
expectedUrl = "http://example.com/foo";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
html = "<a href=\"../baz\">link</a>";
host = "example.com/";
expectedUrl = "http://example.com/baz";
setDoc(html, host);
assertThat(extractor.getInternalLinks(host), hasItem(expectedUrl));
}
/**
* Test of canonizeHost method, of class LinksExtractor.
*/
@Test
public void testCanonizeHost() {
System.out.println("canonizeHost");
assertEquals("example.com", extractor.canonizeHost("example.com"));
assertEquals("example.com", extractor.canonizeHost("www.example.com"));
assertEquals("example.com", extractor.canonizeHost("http://example.com"));
assertEquals("example.com", extractor.canonizeHost("https://example.com"));
assertEquals("example.com", extractor.canonizeHost("http://www.example.com"));
assertEquals("foo.example.com", extractor.canonizeHost("foo.example.com"));
assertEquals("foo.example.com", extractor.canonizeHost("http://foo.example.com"));
assertEquals("example.com/foo/bar", extractor.canonizeHost("www.example.com/foo/bar"));
}
/**
* Test of createLinkUrl method, of class LinksExtractor.
*/
@Test
public void testCreateLinkUrl() {
System.out.println("createLinkUrl");
assertEquals("http://example.com", extractor.createLinkUrl(doc.createElement("a").attr("href", "")));
assertEquals("http://example.com/foo.htm", extractor.createLinkUrl(doc.createElement("a").attr("href", "http://example.com/foo.htm")));
assertEquals("http://example.com/foo.htm", extractor.createLinkUrl(doc.createElement("a").attr("href", "foo.htm")));
assertEquals("http://example.com/foo.htm", extractor.createLinkUrl(doc.createElement("a").attr("href", "./foo.htm")));
assertEquals("http://example.com/foo.htm", extractor.createLinkUrl(doc.createElement("a").attr("href", "/foo.htm")));
assertEquals("http://example.com/foo.htm", extractor.createLinkUrl(doc.createElement("a").attr("href", "/foo.htm#anchor")));
assertEquals("http://example.com/foo/bar/", extractor.createLinkUrl(doc.createElement("a").attr("href", "/foo/bar/")));
doc.setBaseUri("http://example.com/foo/");
assertEquals("http://example.com/bar/baz/", extractor.createLinkUrl(doc.createElement("a").attr("href", "../bar/baz/")));
}
/**
* Test of isInternal method, of class LinksExtractor.
*/
@Test
public void testIsInternal() {
System.out.println("isInternal");
String link = "";
String host = "";
host = "example.com";
link = "http://example.com";
assertTrue(link + " - " + host, extractor.isInternal(link, host));
host = "example.com";
link = "http://example.com/bar";
assertTrue(link + " - " + host, extractor.isInternal(link, host));
host = "example.com";
link = "http://foo.example.com";
assertTrue(link + " - " + host, extractor.isInternal(link, host));
host = "example.com";
link = "http://foo.example.com/bar";
assertTrue(link + " - " + host, extractor.isInternal(link, host));
host = "foo.example.com";
link = "http://example.com";
assertFalse(link + " - " + host, extractor.isInternal(link, host));
host = "foo.example.com";
link = "http://bar.example.com";
assertFalse(link + " - " + host, extractor.isInternal(link, host));
host = "foo.example.com";
link = "http://example.com/bar";
assertFalse(link + " - " + host, extractor.isInternal(link, host));
host = "foo.example.com/bar/baz";
link = "http://example.com/bar";
assertFalse(link + " - " + host, extractor.isInternal(link, host));
}
private void setDoc(String html, String host) {
String htmlSkeleton = "<html><head></head><body></body></html>";
this.doc = Jsoup.parse(htmlSkeleton, "http://" + host);
this.doc.body().append(html);
this.doc.normalise();
this.extractor = new LinksExtractor(doc);
}
/**
* Test of removePhpsessid method, of class LinksExtractor.
*/ @Test
public void testRemovePhpsessid() {
System.out.println("removePhpsessid");
assertEquals("http://example.com", extractor.removePhpsessid("http://example.com?PHPSESSID=37f8b870e53af9a55119f29b9d889783"));
assertEquals("http://example.com?foo=bar", extractor.removePhpsessid("http://example.com?PHPSESSID=37f8b870e53af9a55119f29b9d889783&foo=bar"));
assertEquals("http://example.com?foo=bar", extractor.removePhpsessid("http://example.com?foo=bar&PHPSESSID=37f8b870e53af9a55119f29b9d889783"));
assertEquals("http://example.com", extractor.removePhpsessid("http://example.com"));
}
}