package com.cedricblondeau.webpage2html.transformers;
import com.cedricblondeau.webpage2html.Configuration;
import com.cedricblondeau.webpage2html.transformers.assets.CssTransformer;
import com.cedricblondeau.webpage2html.transformers.assets.Transformer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.logging.Logger;
import java.net.URL;
public final class HtmlTransformer {
private Configuration configuration;
private Document document;
private URL url;
private String charset;
private static final Logger logger = Logger.getLogger(HtmlTransformer.class.getName());
public HtmlTransformer(String content, URL url, String charset, Configuration configuration) {
document = Jsoup.parse(content, url.toExternalForm());
this.url = url;
this.charset = charset;
this.configuration = configuration;
}
public void transform() {
injectEncoding();
transformStyle();
transformLink();
transformScript();
transformImg();
}
private void injectEncoding() {
if (charset instanceof String) {
boolean charsetDefinitionFound = document.head().getElementsByTag("meta").hasAttr("charset");
if (!charsetDefinitionFound) {
logger.info(String.format("Injecting charset %s", charset));
document.head().append(String.format("<meta charset=\"%s\"/>", charset));
}
}
}
private void transformStyle() {
Elements styleElements = document.getElementsByAttribute("style");
for (Element element : styleElements) {
logger.info("Transforming inline style");
CssTransformer cssTransformer = new CssTransformer(element.attr("style"), url, configuration);
element.attr("style", cssTransformer.getContent());
}
}
private void transformLink() {
Elements linkElements = document.getElementsByTag("link");
for (Element element : linkElements) {
String rel = element.attr("rel");
if (!rel.isEmpty() && (rel.equals("stylesheet") || rel.equals("icon"))) {
String href = element.attr("href");
if (!href.isEmpty() && !href.startsWith("data:")) {
logger.info(String.format("Transforming link %s", element.attr("href")));
Transformer transformer = new TransformerFactory(configuration).get(element.attr("href"), url);
if (transformer instanceof Transformer) {
if (transformer instanceof CssTransformer) {
element.after(String.format("<style>%s</style>", ((CssTransformer) transformer).getContent()));
element.remove();
} else {
element.attr("href", transformer.getBase64());
}
}
}
}
}
}
private void transformScript() {
Elements scriptElements = document.getElementsByTag("script");
for (Element element : scriptElements) {
if (element.hasAttr("src") && !element.attr("src").isEmpty() && !element.attr("src").startsWith("data:")) {
logger.info(String.format("Transforming script %s", element.attr("src")));
Transformer transformer = new TransformerFactory(configuration).get(element.attr("src"), url);
if (transformer instanceof Transformer) {
element.attr("src", transformer.getBase64());
}
}
}
}
private void transformImg() {
Elements imgElements = document.getElementsByTag("img");
for (Element element : imgElements) {
if (element.hasAttr("src") && !element.attr("src").isEmpty() && !element.attr("src").startsWith("data:")) {
logger.info(String.format("Transforming image %s", element.attr("src")));
Transformer transformer = new TransformerFactory(configuration).get(element.attr("src"), url);
if (transformer instanceof Transformer) {
element.attr("src", transformer.getBase64());
}
}
}
}
/**
* @return JSoup Document
*/
public Document getDocument() {
return document;
}
/**
* @return JSoup Document inner HTML
*/
public String getHtml() {
return document.html();
}
/**
* @return JSoup Document title
*/
public String getTitle() {
return document.title();
}
/**
* @return URL
*/
public URL getUrl() {
return url;
}
}