package com.github.jaystgelais.easymail;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URL;
import org.fit.cssbox.css.CSSNorm;
import org.fit.cssbox.css.DOMAnalyzer;
import org.fit.cssbox.io.DOMSource;
import org.fit.cssbox.io.DefaultDOMSource;
import org.fit.cssbox.io.DocumentSource;
import org.fit.cssbox.io.StreamDocumentSource;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
/**
* Utility class that provides style inlining transformation for HTML documents.
*
* @author jaystgelais
*/
public final class HtmlProcessor {
private HtmlProcessor() { }
/**
* Produces EmailMessage content based on a number of transformation on teh supplied HtmlContentProvider to
* increase it's cross client rendering compatibility.
*
* Transformations include:
* <ol>
* <li>Calculate effective styles of all elements and write them to their style attribute.</li>
* <li>Remove style declarations from head of document.</li>
* <li>Remove class attributes from all elements.</li>
* <li>Configure Embedded image references for all images pointing to relative URLs.</li>
* </ol>
*
* @param contentProvider Input to sHTML processing transformations.
* @return MessageContent containing processed HTML and embedded images.
* @throws HtmlTransformationException If any errors occur preventing the transformation of the inputted HTML
* document.
*/
public static EmailMessageContent process(final HtmlContentProvider contentProvider)
throws HtmlTransformationException {
DocumentSource docSource = null;
EmailMessageContent emailMessageContent = new EmailMessageContent(contentProvider);
try {
docSource = newDocumentSource(contentProvider);
Document doc = parseHtml(docSource);
applyEffectiveStylesToStyleAttributes(doc, contentProvider.getBaseURL());
removeStyleElements(doc);
removeClassAttributes(doc);
configureEmbeddedImages(doc, emailMessageContent);
emailMessageContent.setHtmlMessage(getHtmlBodyAsString(doc));
return emailMessageContent;
} catch (Exception e) {
throw new HtmlTransformationException("Error occurred transforming HTML to use inline styles.", e);
} finally {
if (docSource != null) {
try {
docSource.close();
} catch (Exception e) {
throw new HtmlTransformationException("Error occurred transforming HTML to use inline styles.", e);
}
}
}
}
private static void configureEmbeddedImages(final Document doc, final EmailMessageContent emailMessageContent)
throws MalformedURLException {
NodeList imgNodes = doc.getElementsByTagName("img");
for (int x = 0; x < imgNodes.getLength(); x++) {
Node imgNode = imgNodes.item(x);
String url = getAttributeValue(imgNode, "src");
if (isRelativeUrl(url)) {
setAttributeValue(imgNode, "src", "cid:" + emailMessageContent.addEmbeddedImage(url).getContentId());
}
}
}
private static void setAttributeValue(final Node node, final String attributeName, final String value) {
if (node.getAttributes() != null && node.getAttributes().getNamedItem(attributeName) != null) {
node.getAttributes().getNamedItem(attributeName).setTextContent(value);
}
}
private static String getAttributeValue(final Node node, final String attributeName) {
if (node.getAttributes() == null || node.getAttributes().getNamedItem(attributeName) == null) {
return null;
}
return node.getAttributes().getNamedItem(attributeName).getTextContent();
}
private static boolean isRelativeUrl(final String url) {
return !url.toLowerCase().startsWith("http://");
}
private static void removeClassAttributes(final Node node) {
if (node.getAttributes() != null && node.getAttributes().getNamedItem("class") != null) {
node.getAttributes().removeNamedItem("class");
}
NodeList children = node.getChildNodes();
for (int x = 0; x < children.getLength(); x++) {
removeClassAttributes(children.item(x));
}
}
private static String getHtmlBodyAsString(final Document doc) throws TransformerException {
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
Node body = doc.getElementsByTagName("body").item(0);
doc.renameNode(body, "", "div");
javax.xml.transform.dom.DOMSource domSource = new javax.xml.transform.dom.DOMSource(body);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.transform(domSource, result);
final String content = writer.toString();
return content.substring(content.indexOf("<div"));
}
private static StreamDocumentSource newDocumentSource(final HtmlContentProvider contentProvider)
throws IOException {
return new StreamDocumentSource(new ByteArrayInputStream(contentProvider.getHtmlMessageContent().getBytes()),
null, "text/html");
}
private static Document parseHtml(final DocumentSource docSource) throws SAXException, IOException {
DOMSource parser = new DefaultDOMSource(docSource);
return parser.parse();
}
private static void applyEffectiveStylesToStyleAttributes(final Document doc, final URL relativeUrl) {
DOMAnalyzer da = new DOMAnalyzer(doc, relativeUrl);
da.attributesToStyles(); //convert the HTML presentation attributes to inline styles
da.addStyleSheet(null, CSSNorm.stdStyleSheet(), DOMAnalyzer.Origin.AGENT); //use the standard style sheet
da.addStyleSheet(null, CSSNorm.userStyleSheet(), DOMAnalyzer.Origin.AGENT); //use the additional style sheet
da.getStyleSheets(); //load the author style sheets
da.stylesToDomInherited();
}
private static void removeStyleElements(final Document doc) {
NodeList styleElements = doc.getElementsByTagName("style");
for (int x = 0; x < styleElements.getLength(); x++) {
styleElements.item(x).getParentNode().removeChild(styleElements.item(x));
}
}
}