package spimedb.util;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Whitelist;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Created by me on 12/26/16.
*/
public final class HTML {
//
// public static final HtmlCompressor compressor = new HtmlCompressor();
//
// static {
// //https://code.google.com/p/htmlcompressor/wiki/Documentation#Using_HTML_Compressor_from_Java_API
//
// compressor.setRemoveComments(true); //if false keeps HTML comments (default is true)
// compressor.setRemoveMultiSpaces(true); //if false keeps multiple whitespace characters (default is true)
// compressor.setRemoveIntertagSpaces(true); //removes iter-tag whitespace characters
// compressor.setRemoveQuotes(true); //removes unnecessary tag attribute quotes
// compressor.setRemoveScriptAttributes(true); //remove optional attributes from script tags
// compressor.setRemoveLinkAttributes(true); //remove optional attributes from link tags
// compressor.setRemoveJavaScriptProtocol(true); //remove optional attributes from link tags
// compressor.setRemoveHttpProtocol(true); //replace "http://" with "//" inside tag attributes
// compressor.setRemoveHttpsProtocol(true); //replace "https://" with "//" inside tag attributes
// compressor.setRemoveSurroundingSpaces("br,p"); //remove spaces around provided tags
// compressor.setRemoveStyleAttributes(true);
//
// compressor.setSimpleDoctype(true); //simplify existing doctype
// compressor.setCompressCss(true); //compress inline css
//
//
// }
static final Whitelist whitelist = Whitelist.basicWithImages();
static final Cleaner cleaner = new Cleaner(whitelist);
static final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
static final Logger logger = LoggerFactory.getLogger(HTML.class);
public static String filterHTML(String html) {
try {
Document dirty = Jsoup.parseBodyFragment(html);
Document clean = cleaner.clean(dirty);
clean.outputSettings(outputSettings);
return clean.body().html();
// String compressedHtml = compressor.compress(html);
// return compressedHtml;
} catch (Exception e) {
logger.error("filterHTML {}: \"{}\"", e, html);
return html;
}
}
}