/* * Calendula - An assistant for personal medication management. * Copyright (C) 2016 CITIUS - USC * * Calendula is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this software. If not, see <http://www.gnu.org/licenses/>. */ package es.usc.citius.servando.calendula.util.prospects; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Entities; import org.jsoup.safety.Cleaner; import org.jsoup.safety.Whitelist; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import es.usc.citius.servando.calendula.activities.WebViewActivity; /** * Give AEMPS leaflets a better look for WebView */ public class LeafletHtmlPostProcessor implements WebViewActivity.HtmlPostprocessor{ @Override public String process(String html) { // Parse str into a Document Document doc = Jsoup.parseBodyFragment(html); doc.select("nav").remove(); doc.select("div#pdfurl").remove(); // white list to clean html Whitelist wl = Whitelist.relaxed(); wl.addTags("div", "span", "p", "h1", "h2", "h3", "ul", "ol", "li", "a", "img"); wl.preserveRelativeLinks(true); wl.addAttributes("img","src"); wl.addAttributes("a", "href"); // perform cleaning Document cleaned = new Cleaner(wl).clean(doc); cleaned.outputSettings().escapeMode(Entities.EscapeMode.xhtml); // Remove empty elements Set<String> removable = new HashSet<>(Arrays.asList("div", "span", "strong","p", "h1", "h2", "h3", "ul", "ol", "li","a")); cleaned.select("p:matchesOwn((?is) )").remove(); // For each element in the cleaned document for(Element el: cleaned.getAllElements()) { if(el.children().isEmpty() && (!el.hasText() || el.text().replaceAll("\u00a0","").trim().equals(""))) { // Element is empty, check if should be removed if(removable.contains(el.tagName())) el.remove(); } } // return html for display return cleaned.html(); } }