/**
* Copyright (c) 2000-present Liferay, Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*/
package com.liferay.mail.reader.internal.util;
import com.liferay.portal.kernel.util.StringPool;
import java.util.regex.Pattern;
/**
* @author Scott Lee
* @author Minhchau Dang
* @author Michael C. Han
*/
public class HtmlContentUtil {
public static String getInlineHtml(String html) {
// Lines
html = _bodyTagPattern.matcher(html).replaceAll(StringPool.BLANK);
html = _doctypeTagPattern.matcher(html).replaceAll(StringPool.BLANK);
html = _htmlTagPattern.matcher(html).replaceAll(StringPool.BLANK);
html = _linkTagPattern.matcher(html).replaceAll(StringPool.BLANK);
// Blocks
html = _headTagPattern.matcher(html).replaceAll(StringPool.BLANK);
html = _scriptTagPattern.matcher(html).replaceAll(StringPool.BLANK);
html = _styleTagPattern.matcher(html).replaceAll(StringPool.BLANK);
return html;
}
public static String getPlainText(String html) {
html = _lineBreakPattern.matcher(html).replaceAll(StringPool.BLANK);
html = _tagPattern.matcher(html).replaceAll(StringPool.BLANK);
return html;
}
private static final Pattern _bodyTagPattern = Pattern.compile(
"</?body[^>]+>", Pattern.CASE_INSENSITIVE);
private static final Pattern _doctypeTagPattern = Pattern.compile(
"<!doctype[^>]+>", Pattern.CASE_INSENSITIVE);
private static final Pattern _headTagPattern = Pattern.compile(
"<head.*?</head>", Pattern.CASE_INSENSITIVE + Pattern.DOTALL);
private static final Pattern _htmlTagPattern = Pattern.compile(
"</?html[^>]+>", Pattern.CASE_INSENSITIVE);
private static final Pattern _lineBreakPattern = Pattern.compile("[\r\n]+");
private static final Pattern _linkTagPattern = Pattern.compile(
"</?link[^>]+>", Pattern.CASE_INSENSITIVE);
private static final Pattern _scriptTagPattern = Pattern.compile(
"<script.*?</script>", Pattern.CASE_INSENSITIVE + Pattern.DOTALL);
private static final Pattern _styleTagPattern = Pattern.compile(
"<style.*?</style>", Pattern.CASE_INSENSITIVE + Pattern.DOTALL);
private static final Pattern _tagPattern = Pattern.compile("<[^>]+>");
}