package com.github.mefi.jkuuza.parser;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
/**
*
* @author mefi
*/
public class ContentExtractorTest {
Document doc = null;
ContentExtractor extractor = null;
public ContentExtractorTest() {
}
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
String htmlSkeleton = "<html><head></head><body></body></html>";
String baseUri = "http://example.com";
doc = Jsoup.parse(htmlSkeleton, baseUri);
extractor = new ContentExtractor(doc);
}
/**
* Test of getMetaDescription method, of class ContentExtractor.
*/
@Test
public void testGetMetaDescription() {
System.out.println("getMetaDescription");
String html = "";
html = "<meta name=\"description\" content=\"foo\" />";
setDocHeader(html);
assertEquals("foo", extractor.getMetaDescription());
html = "<meta name=description content=foo />";
setDocHeader(html);
assertEquals("foo", extractor.getMetaDescription());
}
/**
* Test of getMetaKeywords method, of class ContentExtractor.
*/
@Test
public void testGetMetaKeywords() {
System.out.println("getMetaKeywords");
String html = "";
html = "<meta name=\"keywords\" content=\"foo\" />";
setDocHeader(html);
assertEquals("foo", extractor.getMetaKeywords());
html = "<meta name=\"keywords\" content=\"foo, bar, baz\" />";
setDocHeader(html);
assertEquals("foo, bar, baz", extractor.getMetaKeywords());
html = "<meta name=keywords content=foo />";
setDocHeader(html);
assertEquals("foo", extractor.getMetaKeywords());
}
/**
* Test of getMetaCharset method, of class ContentExtractor.
*/
@Test
public void testGetMetaCharset() {
System.out.println("getMetaCharset");
String html = "";
html = "<meta http-equiv=\"Content-type\" content=\"text/html; charset=UTF-8\">";
setDocHeader(html);
assertEquals("UTF-8", extractor.getMetaCharset());
html = "<META HTTP-EQUIV=\"Content-type\" CONTENT=\"text/html; charset=UTF-8\">";
setDocHeader(html);
assertEquals("UTF-8", extractor.getMetaCharset());
html = "<meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\">";
setDocHeader(html);
assertEquals("utf-8", extractor.getMetaCharset());
}
/**
* Test of hasMetaDescription method, of class ContentExtractor.
*/
@Test
public void testHasMetaDescription() {
System.out.println("hasMetaDescription");
String html = "";
String message = "";
html = "<meta name=\"description\" content=\"foo\" />";
message = "expected: true - " + html;
setDocHeader(html);
assertTrue(message, extractor.hasMetaDescription());
html = "";
message = "expected: false - " + html;
setDocHeader(html);
assertFalse(message, extractor.hasMetaDescription());
html = "<meta name=\"description\" />";
message = "expected: false - " + html;
setDocHeader(html);
assertFalse(message, extractor.hasMetaDescription());
}
/**
* Test of hasMetaKeywords method, of class ContentExtractor.
*/
@Test
public void testHasMetaKeywords() {
System.out.println("hasMetaKeywords");
String html = "";
String message = "";
html = "<meta name=\"keywords\" content=\"foo\" />";
message = "expected: true - " + html;
setDocHeader(html);
assertTrue(message, extractor.hasMetaKeywords());
html = "";
message = "expected: false - " + html;
setDocHeader(html);
assertFalse(message, extractor.hasMetaKeywords());
html = "<meta name=\"keywords\" />";
message = "expected: false - " + html;
setDocHeader(html);
assertFalse(message, extractor.hasMetaKeywords());
}
/**
* Test of hasMetaCharset method, of class ContentExtractor.
*/
@Test
public void testHasMetaCharset() {
System.out.println("hasMetaCharset");
String html = "";
String message = "";
html = "<META http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />";
message = "expected: true - " + html;
setDocHeader(html);
assertTrue(message, extractor.hasMetaCharset());
html = "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />";
message = "expected: true - " + html;
setDocHeader(html);
assertTrue(message, extractor.hasMetaCharset());
html = "";
message = "expected: false - " + html;
setDocHeader(html);
assertFalse(message, extractor.hasMetaCharset());
html = "<META http-equiv=\"Content-Type\" />";
message = "expected: false - " + html;
setDocHeader(html);
assertFalse(message, extractor.hasMetaCharset());
}
private void setDocHeader(String html) {
String htmlSkeleton = "<html><head></head><body></body></html>";
String baseUri = "http://example.com";
doc = Jsoup.parse(htmlSkeleton, baseUri);
this.doc.head().append(html);
this.doc.normalise();
this.extractor = new ContentExtractor(doc);
}
/**
* Test of getTitle method, of class ContentExtractor.
*/ @Test
public void testGetTitle() {
System.out.println("getTitle");
String html = "";
html = "<title>foo</title>";
setDocHeader(html);
assertEquals("foo", extractor.getTitle());
html = "<title></title>";
setDocHeader(html);
assertEquals("", extractor.getTitle());
html = "";
setDocHeader(html);
assertEquals("", extractor.getTitle());
}
/**
* Test of getValue method, of class ContentExtractor.
*/ @Test
public void testGetValue() {
System.out.println("getValue");
String html = "";
ContentExtractor contentExtractor;
html = "<div class=\"foo\">bar</div>";
contentExtractor = new ContentExtractor(Jsoup.parse(html));
assertEquals("bar", contentExtractor.getValue(".foo"));
html = "<div class=\"foo\">bar<span>baz</span></div>";
contentExtractor = new ContentExtractor(Jsoup.parse(html));
assertEquals("baz", contentExtractor.getValue(".foo span"));
/*
html = "<div class=\"foo\">bar<span>baz</span></div>";
contentExtractor = new ContentExtractor(Jsoup.parse(html));
assertEquals("bar", contentExtractor.getValue(".foo"));
*/
}
/**
* Test of getValuesOf method, of class ContentExtractor.
*/ @Test
public void testGetValuesOf() {
System.out.println("getValuesOf");
String html = "";
ContentExtractor contentExtractor;
html = "<div class=\"foo\">bar</div><div class=\"foo\">baz</div>";
ArrayList list1 = new ArrayList();
list1.add("bar");
list1.add("baz");
contentExtractor = new ContentExtractor(Jsoup.parse(html));
ArrayList result = contentExtractor.getValuesOf(".foo");
assertEquals(list1.size(), result.size());
for (int i = 0; i < result.size(); i++) {
assertEquals(list1.get(i), result.get(i));
}
}
}