/**********************************************************************************
* $URL: https://source.sakaiproject.org/svn/kernel/trunk/kernel-util/src/main/java/org/sakaiproject/util/FormattedText.java $
* $Id: FormattedText.java 97738 2011-08-31 17:30:03Z ottenhoff@longsight.com $
***********************************************************************************
*
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008 Sakai Foundation
*
* Licensed under the Educational Community License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.opensource.org/licenses/ECL-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**********************************************************************************/
package org.sakaiproject.util.api;
import org.w3c.dom.Element;
/**
* These Utils provide support for user entry of formatted text (typically HTML). This
* includes text formatting in user input such as bold, underline, and fonts.
* There are also utils which support other kinds of text processing (e.g. javascript)
* and escaping (e.g. SQL). Generally anything related to text which is not simply
* plaintext and has some kind of formatting.
*
* @author Aaron Zeckoski (azeckoski @ vt.edu)
*/
public interface FormattedText {
/**
* Level of security to use while doing the scan of html content
*/
public enum Level {
/**
* Use the configured system default (typically HIGH but may have been configured to LOW),
* this should be used in most cases and will be used if the level was set to null
*/
DEFAULT,
/**
* Use for untrusted users (e.g. students)
*/
HIGH,
/**
* Use for trusted users (e.g. teachers)
*/
LOW,
/**
* Use for admins and special cases only (e.g. super admin)
*/
NONE
}
/**
* This is maintained for backwards compatibility
* @see #processFormattedText(String, StringBuilder)
* @deprecated since Nov 2007, use {@link #processFormattedText(String, StringBuilder)} instead
*/
public String processFormattedText(final String strFromBrowser, StringBuffer errorMessages);
/**
* Processes and validates user-entered HTML received from the web browser (from the WYSIWYG editor). Validates that the user input follows the Sakai formatted text specification; disallows dangerous stuff such as <SCRIPT> JavaScript tags.
* Encodes the text according to the formatted text specification, for the rest of the system to use.
* <br/>
* Use {@link #processFormattedText(String, StringBuilder, boolean)} if you need the behavior of the old sakai html cleaner processor
*
* @param strFromBrowser
* The formatted text as sent from the web browser (from the WYSIWYG editor)
* @param errorMessages
* User-readable error messages will be returned here.
* @return The validated processed formatted text, ready for use by the system.
*/
public String processFormattedText(final String strFromBrowser, StringBuilder errorMessages);
/**
* Processes and validates user-entered HTML received from the web browser (from the WYSIWYG editor). Validates that the user input follows the Sakai formatted text specification; disallows dangerous stuff such as <SCRIPT> JavaScript tags.
* Encodes the text according to the formatted text specification, for the rest of the system to use.
* <br/>
* Use {@link #processFormattedText(String, StringBuilder, boolean)} if you need the behavior of the old sakai html cleaner processor
*
* @param strFromBrowser
* The formatted text as sent from the web browser (from the WYSIWYG editor)
* @param errorMessages
* User-readable error messages will be returned here.
* @param level
* The security level used for the scan (HIGH level will be more aggressive about what is allowed while NONE will allow anything),
* null or DEFAULT will use whatever security level the system is configured for
* @return The validated processed formatted text, ready for use by the system.
*/
public String processFormattedText(final String strFromBrowser, StringBuilder errorMessages, Level level);
/**
* Processes and validates user-entered HTML received from the web browser (from the WYSIWYG editor). Validates that the user input follows the Sakai formatted text specification; disallows dangerous stuff such as <SCRIPT> JavaScript tags.
* Encodes the text according to the formatted text specification, for the rest of the system to use.
*
* @param strFromBrowser
* The formatted text as sent from the web browser (from the WYSIWYG editor)
* @param errorMessages
* User-readable error messages will be returned here.
* @param useLegacySakaiCleaner if true the old html cleaner is used, if false the new OWASP antisamy cleaner is used
* @return The validated processed formatted text, ready for use by the system.
*/
public String processFormattedText(final String strFromBrowser, StringBuilder errorMessages, boolean useLegacySakaiCleaner);
/**
* Process an HTML document that has been edited using the formatted text widget. The document can contain any valid HTML; it will NOT be checked to eliminate things like image tags, script tags, etc, because it is its own document.
*
* @param strFromBrowser
* @param errorMessages
*/
public String processHtmlDocument(final String strFromBrowser, StringBuilder errorMessages);
/**
* Processes and validates HTML formatted text received from the web browser (from the WYSIWYG editor). Validates that the user input follows the Sakai formatted text specification; can disallow dangerous stuff such as <SCRIPT> JavaScript tags.
* Encodes the text according to the formatted text specification, for the rest of the system to use.
* <br/>
* Use {@link #processFormattedText(String, StringBuilder, boolean, boolean, boolean)} if you need the behavior of the old sakai html cleaner processor
*
* @param strFromBrowser
* The formatted text as sent from the web browser (from the WYSIWYG editor)
* @param errorMessages
* User-readable error messages will be returned here.
* @param checkForEvilTags
* If true, check for tags and attributes that shouldn't be in formatted text
* @param replaceWhitespaceTags
* If true, clean up line breaks to be like "<br />".
* @return The validated processed HTML formatted text, ready for use by the system.
*/
public String processFormattedText(final String strFromBrowser, StringBuilder errorMessages, boolean checkForEvilTags,
boolean replaceWhitespaceTags);
/**
* Processes and validates HTML formatted text received from the web browser (from the WYSIWYG editor). Validates that the user input follows the Sakai formatted text specification; can disallow dangerous stuff such as <SCRIPT> JavaScript tags.
* Encodes the text according to the formatted text specification, for the rest of the system to use.
*
* @param strFromBrowser
* The formatted text as sent from the web browser (from the WYSIWYG editor)
* @param errorMessages
* User-readable error messages will be returned here.
* @param level
* The security level used for the scan (HIGH level will be more aggressive about what is allowed while NONE will allow anything),
* null or DEFAULT will use whatever security level the system is configured for
* @param checkForEvilTags
* If true, check for tags and attributes that shouldn't be in formatted text
* @param replaceWhitespaceTags
* If true, clean up line breaks to be like "<br />".
* @param useLegacySakaiCleaner if true the old html cleaner is used, if false the new OWASP antisamy cleaner is used
* @return The validated processed HTML formatted text, ready for use by the system.
*/
public String processFormattedText(final String strFromBrowser, StringBuilder errorMessages, Level level, boolean checkForEvilTags,
boolean replaceWhitespaceTags, boolean useLegacySakaiCleaner);
/**
* Prepares the given HTML formatted text for output as part of an HTML document.
*
* @param value
* The formatted text to output in an HTML document.
* @return The string to include in an HTML document.
* @see FormattedText#escapeHtml(String, boolean)
*/
public String escapeHtmlFormattedText(String value);
/**
* Prepares the given HTML formatted text for output as part of an HTML document, removing newlines ("<br />").
*
* @param value
* The formatted text to output in an HTML document.
* @return The string to include in an HTML document.
* @see FormattedText#escapeHtml(String, boolean)
*/
public String escapeHtmlFormattedTextSupressNewlines(String value);
/**
* Prepares the given formatted text for editing within the WYSIWYG editor. All HTML meta-characters in the string will be escaped.
*
* @param value
* The formatted text to escape
* @return The string to use as the value of the formatted textarea widget
* @see FormattedText#escapeHtml(String, boolean)
*/
public String escapeHtmlFormattedTextarea(String value);
/**
* Converts the given plain text into HTML formatted text. Conversion to formatted text involves escaping characters that are used for formatting (such as the '<' character). Also converts plaintext line breaks into HTML line breaks ("<br />
* ").
*
* @param value
* The plain text to convert to formatted text
* @return The converted plain text, now as formatted text
*/
public String convertPlaintextToFormattedText(String value);
/**
* Escape a plaintext string so that it can be output as part of an HTML document.
* Amperstand, greater-than, less-than, newlines, etc, will be escaped so that they display (instead of being interpreted as formatting).
* Automatically converts newlines.
*
* @param value
* The string to escape.
* @return value fully escaped for HTML.
* @see #escapeHtml(String, boolean)
*/
public String escapeHtml(String value);
/**
* Escape the given value so that it appears as-is in HTML -
* that is, HTML meta-characters like '<' are escaped to HTML character entity references like '<'.
* Markup, amper, quote are escaped. Whitespace is not.
*
* @param value The string containing html to escape (can be null or "")
* @param escapeNewlines
* Whether to convert newlines (\n) to "<br />\n" so that they appear as HTML line breaks.
* @return value fully escaped for HTML (this will never return a null but will instead return empty string - "")
*/
public String escapeHtml(String value, boolean escapeNewlines);
/**
* Store the given formatted text in the given XML element; stores both a formatted text representation, and a plaintext representation (plaintext means the formatting has been stripped).
*/
public void encodeFormattedTextAttribute(Element element, String baseAttributeName, String value);
/**
* Returns a String with characters above 128 as entity references.
*
* @param value
* The text to encode.
* @return The encoded text.
*/
public String encodeUnicode(String value);
/**
* For converting plain-text URLs in a String to HTML <a> tags
* Any URLs in the source text that happen to be already in a <a> tag will be unaffected.
*
* @param text the plain text to convert
* @return the full source text with URLs converted to HTML.
*/
public String encodeUrlsAsHtml(String text);
/**
* Returns a String with HTML entity references converted to characters suitable for processing as formatted text.
*
* @param value
* The text containing entity references (e.g., a News item description).
* @return The HTML, ready for processing.
*/
public String unEscapeHtml(String value);
/**
* Returns a String with HTML anchor normalized to include only href and target="_blank" for safe display by a browser.
*
* @param anchor
* The anchor tag to be normalized.
* @return The anchor tag containing only href and target="_blank".
*/
public String processAnchor(String anchor);
/**
* Processes and validates character data as HTML. Disallows dangerous stuff such as <SCRIPT> JavaScript tags. Encodes the text according to the formatted text specification, for the rest of the system to use.
*
* @param source
* The escaped HTML (e.g., from the News service)
* @return The validated processed formatted text, ready for use by the system.
*/
public String processEscapedHtml(final String source);
/**
* Retrieves a formatted text attribute from an XML element; converts from older forms of formatted text or plaintext, if found. For example, if the baseAttributeName "foo" is specified, the attribute "foo-html" will be looked for first, and then
* "foo-formatted", and finally just "foo" (plaintext).
*
* @param element
* The XML element from which to retrieve the formatted text attribute
* @param baseAttributeName
* The base attribute name of the formatted text attribute
*/
public String decodeFormattedTextAttribute(Element element, String baseAttributeName);
/**
* Converts the given HTML formatted text to plain text - loses formatting information. For example, The formatted text <xmp>"Hello <br />
* <b>World!</b>"</xmp> becomes plain text "Hello \nWorld!" Strips all formatting information from the formatted text
*
* @param value
* The formatted text to convert
* @return The plain text (all formatting removed)
*/
public String convertFormattedTextToPlaintext(String value);
/**
* Converts old-style formatted text to the new style. Previous to Sakai release 1.5, displayed line breaks were stored as "\n". Now, displayed like breaks are properly stored in the HTML-standard way as "<br />". This method converts from the
* previous form.
*
* @param value
* @return converted text
*/
public String convertOldFormattedText(String value);
/**
* Trims a formatted text string to the given maximum number of displayed characters, preserving formatting. For example, trim("Hello & <b>World</b>!", 9) returns "Hello & <b>W</b>" Ignores HTML comments like "<!-- comment -->"
*
* @param formattedText
* The formatted text to trim
* @param maxNumOfChars
* The maximum number of displayed characters in the returned trimmed formatted text.
* @param strTrimmed
* A StringBuilder to hold the trimmed formatted text
* @return true If the formatted text was trimmed
*/
public boolean trimFormattedText(String formattedText, final int maxNumOfChars, StringBuilder strTrimmed); // trimFormattedText()
/**
* decode any HTML Numeric Character References of the style: hexnumber; or decimalnumber; or of our own special style: ^^Xhexnumber^ or ^^decimalnumber^
*/
public String decodeNumericCharacterReferences(String value);
/**
* WEB Utility -
* Return a string based on value that is safe to place into a javascript / html identifier:
* anything not alphanumeric change to the char 'x'.
* If the first character is not alphabetic, a letter 'i' is prepended.
* Used for generating javascript variable and field names.
*
* @param value
* The string to escape.
* @return value fully escaped using javascript / html identifier rules.
*/
public String escapeJavascript(String value);
/**
* WEB Utility -
* Return a string based on value that is safe to place into a javascript value that is in single quotes.
* Useful to use with JSON or Javascript variables which are being set dynamically.
* Can also be accomplished with: Use http://commons.apache.org/lang/api/org/apache/commons/lang/StringEscapeUtils.html
*
* @param value
* The string to escape.
* @return value String escaped for JSON or JS.
*/
public String escapeJsQuoted(String value);
/**
* WEB Utility -
* Return a string based on id that is fully escaped using URL rules, using a UTF-8 underlying encoding.
*
* Note: java.net.URLEncode.encode() provides a more standard option
* FormattedText.decodeNumericCharacterReferences() undoes this operation
*
* @param value
* The string to escape.
* @return value fully escaped using URL rules.
*/
public String escapeUrl(String value);
/**
* General utility to validate a URL.
* The idea is to encode the rules we have for URLs we are willing
* to put in src="URL" or href="URL" places within our code,
* relative URLs must start with "/"
*
* @param urlToValidate a URL that might be placed in Sakai content
* @return true if the URL is valid OR false if it fails the tests
*/
public boolean validateURL(String urlToValidate);
}