package com.anthony.library.utils; import java.net.HttpURLConnection; import java.net.URL; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class HtmlUtil { //css样式,隐藏header private static final String HIDE_HEADER_STYLE = "<style>div.headline{display:none;}</style>"; //css style tag,需要格式化 private static final String NEEDED_FORMAT_CSS_TAG = "<link rel=\"stylesheet\" type=\"text/css\" href=\"%s\"/>"; // js script tag,需要格式化 private static final String NEEDED_FORMAT_JS_TAG = "<script src=\"%s\"></script>"; public static final String MIME_TYPE = "text/html; charset=utf-8"; public static final String ENCODING = "utf-8"; private HtmlUtil() { } /** * 根据css链接生成Link标签 * * @param url String * @return String */ public static String createCssTag(String url) { return String.format(NEEDED_FORMAT_CSS_TAG, url); } /** * 根据多个css链接生成Link标签 * * @param urls List<String> * @return String */ public static String createCssTag(List<String> urls) { final StringBuilder sb = new StringBuilder(); for (String url : urls) { sb.append(createCssTag(url)); } return sb.toString(); } /** * 根据js链接生成Script标签 * * @param url String * @return String */ public static String createJsTag(String url) { return String.format(NEEDED_FORMAT_JS_TAG, url); } /** * 根据多个js链接生成Script标签 * * @param urls List<String> * @return String */ public static String createJsTag(List<String> urls) { final StringBuilder sb = new StringBuilder(); for (String url : urls) { sb.append(createJsTag(url)); } return sb.toString(); } /** * 根据样式标签,html字符串,js标签 * 生成完整的HTML文档 */ public static String createHtmlData(String html, List<String> cssList, List<String> jsList) { final String css = HtmlUtil.createCssTag(cssList); final String js = HtmlUtil.createJsTag(jsList); return css.concat(HIDE_HEADER_STYLE).concat(html).concat(js); } /** * url is usable * * @param url * @return */ public static boolean isUrlUsable(String url) { if (StringUtils.isEmpty(url)) { return false; } URL urlTemp = null; HttpURLConnection connt = null; try { urlTemp = new URL(url); connt = (HttpURLConnection) urlTemp.openConnection(); connt.setRequestMethod("HEAD"); int returnCode = connt.getResponseCode(); if (returnCode == HttpURLConnection.HTTP_OK) { return true; } } catch (Exception e) { return false; } finally { connt.disconnect(); } return false; } /** * is url * * @param url * @return */ public static boolean isUrl(String url) { Pattern pattern = Pattern.compile("^([hH][tT]{2}[pP]://|[hH][tT]{2}[pP][sS]://)(([A-Za-z0-9-~]+).)+([A-Za-z0-9-~\\/])+$"); return pattern.matcher(url).matches(); } //替换html标签 public static String delHTMLTag(String htmlStr) { String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>"; //定义script的正则表达式 String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>"; //定义style的正则表达式 String regEx_html = "<[^>]+>"; //定义HTML标签的正则表达式 Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE); Matcher m_script = p_script.matcher(htmlStr); htmlStr = m_script.replaceAll(""); //过滤script标签 Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE); Matcher m_style = p_style.matcher(htmlStr); htmlStr = m_style.replaceAll(""); //过滤style标签 Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE); Matcher m_html = p_html.matcher(htmlStr); htmlStr = m_html.replaceAll(""); //过滤html标签 return htmlStr.trim(); //返回文本字符串 } }