/* * (c) Copyright 2010-2011 AgileBirds * * This file is part of OpenFlexo. * * OpenFlexo is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * OpenFlexo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenFlexo. If not, see <http://www.gnu.org/licenses/>. * */ package org.openflexo.wysiwyg; import java.awt.Color; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.Enumeration; import java.util.List; import java.util.Stack; import java.util.logging.Logger; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.CSS; import javax.swing.text.html.HTML; import javax.swing.text.html.HTML.Attribute; import javax.swing.text.html.HTML.Tag; import javax.swing.text.html.HTMLEditorKit.ParserCallback; import javax.swing.text.html.parser.ParserDelegator; import org.openflexo.toolbox.HTMLUtils; public class FlexoWysiwygHtmlCleaner extends ParserCallback { @SuppressWarnings("unused") private static final Logger logger = Logger.getLogger(FlexoWysiwygHtmlCleaner.class.getPackage().getName()); public static String cleanHtml(String originalHtml, List<?> availableStyleClasses) throws IOException { if (originalHtml != null) { FlexoWysiwygHtmlCleaner cleaner = new FlexoWysiwygHtmlCleaner(availableStyleClasses); Reader reader = new StringReader(originalHtml); new ParserDelegator().parse(reader, cleaner, false); return cleaner.getCleanedHtml(); } return null; } private StringBuilder newHtml = new StringBuilder(); private List<?> availableStyleClasses; private Stack<String> closeTagsStack = new Stack<String>(); public FlexoWysiwygHtmlCleaner(List<?> availableStyleClasses) { this.availableStyleClasses = availableStyleClasses; } private String getCleanedHtml() { return newHtml.toString(); } @Override public void handleComment(char[] data, int pos) { // remove comments } @Override public void handleEndOfLineString(String eol) { newHtml.append(eol); } @Override public void handleText(char[] data, int pos) { newHtml.append(HTMLUtils.escapeStringForHTML(new String(data), false)); } @Override public void handleSimpleTag(Tag t, MutableAttributeSet a, int pos) { newHtml.append(getCleanedTag(t, a, true)); } @Override public void handleStartTag(Tag t, MutableAttributeSet a, int pos) { newHtml.append(getCleanedTag(t, a, false)); } @Override public void handleEndTag(Tag t, int pos) { closeNextTag(); } private void closeNextTag() { newHtml.append(closeTagsStack.pop()); } private String getCleanedTag(Tag tag, MutableAttributeSet attributeSet, boolean closeTag) { if (!isTagKept(tag)) { if (!closeTag) { closeTagsStack.push(""); // All tags must be in closeTagsStack } return ""; } StringBuilder sbMainTag = new StringBuilder(); StringBuilder sbAdditionalTag = new StringBuilder(); sbMainTag.append("<" + tag.toString()); for (Enumeration<?> en = attributeSet.getAttributeNames(); en.hasMoreElements();) { Object attributeName = en.nextElement(); String cleanedAttribute = getCleanedAttribute(tag, attributeName, attributeSet.getAttribute(attributeName)); if (attributeName == Attribute.STYLE && tag != Tag.FONT && sbAdditionalTag.length() == 0) { String fontAttributes = extractFontAttributesFromStyles(attributeSet.getAttribute(attributeName).toString()); if (fontAttributes != null && fontAttributes.length() > 0) { sbAdditionalTag.append("<" + Tag.FONT + fontAttributes + ">"); } } if (cleanedAttribute.length() > 0) { sbMainTag.append(" " + cleanedAttribute); } } if (closeTag) { sbMainTag.append(" /"); } sbMainTag.append(">"); if (closeTag) { // The eventual additional tag must be outside the main tag if (sbAdditionalTag.length() > 0) { return sbAdditionalTag.toString() + sbMainTag.toString() + "</" + Tag.FONT + ">"; } return sbMainTag.toString(); } // The eventual additional tag must be inside the main tag if (sbAdditionalTag.length() > 0) { closeTagsStack.push("</" + Tag.FONT + "></" + tag + ">"); return sbMainTag.toString() + sbAdditionalTag.toString(); } closeTagsStack.push("</" + tag + ">"); return sbMainTag.toString(); } private boolean isTagKept(Tag tag) { return tag == HTML.Tag.A || tag == HTML.Tag.B || tag == HTML.Tag.BR || tag == HTML.Tag.DIV || tag == HTML.Tag.FONT || tag == HTML.Tag.H1 || tag == HTML.Tag.H2 || tag == HTML.Tag.H3 || tag == HTML.Tag.H4 || tag == HTML.Tag.H5 || tag == HTML.Tag.H6 || tag == HTML.Tag.I || tag == HTML.Tag.IMG || tag == HTML.Tag.LI || tag == HTML.Tag.OL || tag == HTML.Tag.P || tag == HTML.Tag.SPAN || tag == HTML.Tag.U || tag == HTML.Tag.UL; } private String getCleanedAttribute(Tag tag, Object attributeName, Object attributeValue) { if (attributeName == null) { return ""; } if (attributeName == Attribute.STYLE) { if (attributeValue != null) { String cleanedStyle = getCleanedStyleValue(attributeValue.toString()); if (cleanedStyle != null && cleanedStyle.length() > 0) { return "style=\"" + cleanedStyle + "\""; } } return ""; } if (attributeName == Attribute.CLASS) { if (attributeValue != null) { String cleanedClass = getCleanedClassValue(attributeValue.toString()); if (cleanedClass != null && cleanedClass.length() > 0) { return "class=\"" + cleanedClass + "\""; } } return ""; } boolean includeAttribute = false; if (tag == Tag.A) { includeAttribute = attributeName == Attribute.HREF || attributeName == Attribute.TARGET || attributeName == Attribute.TITLE; } else if (tag == Tag.FONT) { includeAttribute = attributeName == Attribute.SIZE || attributeName == Attribute.COLOR; } else if (tag == Tag.IMG) { includeAttribute = attributeName == Attribute.WIDTH || attributeName == Attribute.HEIGHT || attributeName == Attribute.SRC; } else if (tag == Tag.P) { includeAttribute = attributeName == Attribute.ALIGN; } if (includeAttribute) { return attributeName + "=\"" + attributeValue + "\""; } return ""; } private String getCleanedStyleValue(String styleValue) { StringBuilder sb = new StringBuilder(); for (String styleEffect : styleValue.split(";")) { String cleanedStyleEffect = getCleanedStyleEffect(styleEffect); if (cleanedStyleEffect.length() > 0) { sb.append(cleanedStyleEffect + ";"); } } return sb.toString(); } private String extractFontAttributesFromStyles(String styleValue) { StringBuilder sb = new StringBuilder(); for (String styleEffect : styleValue.split(";")) { String[] keyAndValue = getStyleEffectKeyAndValue(styleEffect); if (keyAndValue != null) { String effectKey = keyAndValue[0]; String effectValue = keyAndValue[1]; CSS.Attribute attribute = CSS.getAttribute(effectKey); if (attribute == CSS.Attribute.COLOR) { sb.append(" " + Attribute.COLOR + "=\"" + effectValue + "\""); } else if (attribute == CSS.Attribute.FONT_SIZE) { Integer fontSizeInPoints = HTMLUtils.getFontSizeInPoints(effectValue); if (fontSizeInPoints != null) { sb.append(" " + Attribute.STYLE + "=\"" + CSS.Attribute.FONT_SIZE + ":" + fontSizeInPoints + "pt;\""); } } } } return sb.toString(); } /** * Effect must contains [effect name]:[effect value] ie. "xxxx: yyyy" * * @param effect * @return */ private String getCleanedStyleEffect(String effect) { String[] keyAndValue = getStyleEffectKeyAndValue(effect); if (keyAndValue == null) { return ""; } String effectKey = keyAndValue[0]; String effectValue = keyAndValue[1]; CSS.Attribute attribute = CSS.getAttribute(effectKey); if (attribute == null) { return ""; } else if (attribute == CSS.Attribute.BACKGROUND_COLOR) { return attribute.toString() + ": " + effectValue; } else if (attribute == CSS.Attribute.FONT_WEIGHT) { if (effectValue.equals("bold") || effectValue.equals("bolder")) { return attribute.toString() + ": bold"; } } else if (attribute == CSS.Attribute.TEXT_DECORATION) { if (effectValue.equals("underline")) { return attribute.toString() + ": underline"; } } else if (attribute == CSS.Attribute.FONT_STYLE) { if (effectValue.equals("italic")) { return attribute.toString() + ": italic"; } } else if (attribute == CSS.Attribute.TEXT_ALIGN) { if (effectValue.equals("left") || effectValue.equals("right") || effectValue.equals("center") || effectValue.equals("justify")) { return attribute.toString() + ": " + effectValue; } } else if (attribute == CSS.Attribute.BACKGROUND) { // Get only the color if any for (String backgroundItem : effectValue.split(" ")) { Color color = HTMLUtils.extractColorFromString(backgroundItem.trim()); if (color != null) { return CSS.Attribute.BACKGROUND_COLOR + ": " + backgroundItem.trim(); } } } return ""; } private String[] getStyleEffectKeyAndValue(String effect) { effect = effect.trim(); int indexOf = effect.indexOf(':'); if (indexOf == -1 || effect.length() <= indexOf + 1) { return null; } String[] keyAndValue = new String[2]; keyAndValue[0] = effect.substring(0, indexOf).trim(); keyAndValue[1] = effect.substring(indexOf + 1).trim(); return keyAndValue; } private String getCleanedClassValue(String classValue) { for (String classItem : classValue.split(" ")) { if (availableStyleClasses.contains(classItem)) { return classItem; // Don't handle multiple class because we cannot set multiple styles in docx } } return null; } }