/*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (c) Alkacon Software GmbH (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software GmbH, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.jsp.decorator;
import org.opencms.file.CmsObject;
import org.opencms.main.CmsException;
import org.opencms.main.CmsLog;
import org.opencms.util.CmsHtmlParser;
import org.opencms.util.CmsStringUtil;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.util.Translate;
/**
* The CmsHtmlDecorator is the main object for processing the text decorations.<p>
*
* It uses the information of a <code>{@link CmsDecoratorConfiguration}</code> to process the
* text decorations.
*
* @since 6.1.3
*/
public class CmsHtmlDecorator extends CmsHtmlParser {
/** Delimiters for string seperation. */
private static final String[] DELIMITERS = {
" ",
",",
".",
";",
":",
"!",
"(",
")",
"'",
"?",
"/",
"\u00A7",
"\"",
" ",
""",
"\r\n",
"\n"};
/** Delimiters for second level string separation. */
private static final String[] DELIMITERS_SECOND_LEVEL = {
"-",
"@",
"/",
"⁄",
".",
",",
"(",
")",
"{",
"}",
"[",
"]",
"\"",
""",
"!",
"?",
";",
"&",
"&",
"%",
"\u00A7",
"§"};
/** Steps for forward lookup in workd list. */
private static final int FORWARD_LOOKUP = 10;
/** The log object for this class. */
private static final Log LOG = CmsLog.getLog(CmsHtmlDecorator.class);
/** Non translators, strings starting with those values must not be translated. */
private static final String[] NON_TRANSLATORS = {" ", """};
/** The decoration configuration.<p> */
I_CmsDecoratorConfiguration m_config;
/** Decoration bundle to be used by the decorator. */
CmsDecorationBundle m_decorations;
/** the CmsObject. */
private CmsObject m_cms;
/** decorate flag. */
private boolean m_decorate;
/**
* Constructor, creates a new, empty CmsHtmlDecorator.<p>
*
* @param cms the CmsObject
* @throws CmsException if something goes wrong
*/
public CmsHtmlDecorator(CmsObject cms)
throws CmsException {
m_config = new CmsDecoratorConfiguration(cms);
m_decorations = m_config.getDecorations();
m_result = new StringBuffer(512);
m_echo = true;
m_decorate = true;
}
/**
* Constructor, creates a new CmsHtmlDecorator with a given configuration.<p>
*
* @param cms the CmsObject
* @param config the configuration to be used
*
*/
public CmsHtmlDecorator(CmsObject cms, I_CmsDecoratorConfiguration config) {
m_config = config;
m_decorations = config.getDecorations();
m_result = new StringBuffer(512);
m_echo = true;
m_decorate = true;
m_cms = cms;
}
/**
* Splits a String into substrings along the provided delimiter list and returns
* the result as a List of Substrings.<p>
*
* @param source the String to split
* @param delimiters the delimiters to split at
* @param trim flag to indicate if leading and trailing whitespaces should be omitted
* @param includeDelimiters flag to indicate if the delimiters should be included as well
*
* @return the List of splitted Substrings
*/
public static List<String> splitAsList(String source, String[] delimiters, boolean trim, boolean includeDelimiters) {
List<String> result = new ArrayList<String>();
String delimiter = "";
int i = 0;
int l = source.length();
int n = -1;
int max = Integer.MAX_VALUE;
// find the next delimiter
for (int j = 0; j < delimiters.length; j++) {
int delimPos = source.indexOf(delimiters[j]);
if (delimPos > -1) {
if (delimPos < max) {
max = delimPos;
n = delimPos;
delimiter = delimiters[j];
}
}
}
while (n != -1) {
// zero - length items are not seen as tokens at start or end
if ((i < n) || ((i > 0) && (i < l))) {
result.add(trim ? source.substring(i, n).trim() : source.substring(i, n));
// add the delimiter to the list as well
if (includeDelimiters && (n + delimiter.length() <= l)) {
result.add(source.substring(n, n + delimiter.length()));
}
} else {
// add the delimiter to the list as well
if (includeDelimiters && source.startsWith(delimiter)) {
result.add(delimiter);
}
}
i = n + delimiter.length();
// find the next delimiter
max = Integer.MAX_VALUE;
n = -1;
for (int j = 0; j < delimiters.length; j++) {
int delimPos = source.indexOf(delimiters[j], i);
if (delimPos > -1) {
if (delimPos < max) {
max = delimPos;
n = delimPos;
delimiter = delimiters[j];
}
}
}
}
// is there a non - empty String to cut from the tail?
if (n < 0) {
n = source.length();
}
if (i < n) {
result.add(trim ? source.substring(i).trim() : source.substring(i));
}
return result;
}
/**
* Processes a HTML string and adds text decorations according to the decoration configuration.<p>
*
* @param html a string holding the HTML code that should be added with text decorations
* @param encoding the encoding to be used
* @return a HTML string with the decorations added.
* @throws Exception if something goes wrong
*/
public String doDecoration(String html, String encoding) throws Exception {
return process(html, encoding);
}
/**
* Resets the first occurance flags of all decoration objects.<p>
*
* This is nescessary if decoration objects should be used for processing more than once. *
*/
public void resetDecorationDefinitions() {
m_config.resetMarkedDecorations();
}
/**
* @see org.htmlparser.visitors.NodeVisitor#visitStringNode(org.htmlparser.Text)
*/
@Override
public void visitStringNode(Text text) {
appendText(text.toPlainTextString(), DELIMITERS, true);
}
/**
* @see org.htmlparser.visitors.NodeVisitor#visitTag(org.htmlparser.Tag)
*/
@Override
public void visitTag(Tag tag) {
super.visitTag(tag);
// get the tagname
String tagname = tag.getTagName();
// this is one of the tags that should not allow decoation
if (m_config.isExcluded(tagname)) {
m_decorate = false;
} else {
m_decorate = true;
}
}
/**
* Appends a text decoration to the output.<p>
*
* A lookup is made to find a text decoration for each word in the given text.
* If a text decoration is found, the word will be decorated and added to the output.
* If no text decoration is found, the word alone will be added to the output.
*
* @param text the text to add a text decoration for
* @param delimiters delimiters for text seperation
* @param recursive flag for recusrive search
*/
private void appendText(String text, String[] delimiters, boolean recursive) {
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(Messages.LOG_HTML_DECORATOR_APPEND_TEXT_2, m_config, text));
}
if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(text) && m_decorate) {
// split the input into single words
List<String> wordList = splitAsList(text, delimiters, false, true);
int wordCount = wordList.size();
for (int i = 0; i < wordCount; i++) {
String word = wordList.get(i);
boolean alreadyDecorated = false;
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_HTML_DECORATOR_PROCESS_WORD_2,
word,
Boolean.valueOf(mustDecode(word, wordList, i))));
}
// test if the word must be decoded
if (mustDecode(word, wordList, i)) {
word = Translate.decode(word);
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(Messages.LOG_HTML_DECORATOR_DECODED_WORD_1, word));
}
}
// test if the word is no delimiter
// try to get a decoration if it is not
CmsDecorationObject decObj = null;
CmsDecorationObject wordDecObj = null;
if (!hasDelimiter(word, delimiters)) {
wordDecObj = (CmsDecorationObject)m_decorations.get(word);
}
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_HTML_DECORATOR_DECORATION_FOUND_2,
wordDecObj,
word));
}
// if there is a decoration object for this word, we must do the decoration
// if not, we must test if the word itself consists of several parts divided by
// second level delimiters
//if ((decObj == null)) {
if (recursive
&& hasDelimiter(word, DELIMITERS_SECOND_LEVEL)
&& !startsWithDelimiter(word, DELIMITERS_SECOND_LEVEL)) {
// add the following symbol if possible to allow the second level decoration
// test to make a forward lookup as well
String secondLevel = word;
if (i < wordCount - 1) {
String nextWord = wordList.get(i + 1);
if (!nextWord.equals(" ")) {
//don't allow HTML entities to be split in the middle during the recursion!
String afterNextWord = "";
if (i < wordCount - 2) {
afterNextWord = wordList.get(i + 2);
}
if (nextWord.contains("&") && afterNextWord.equals(";")) {
secondLevel = word + nextWord + ";";
i += 2;
} else {
secondLevel = word + nextWord;
i++;
}
}
}
// check if the result is modified by any second level decoration
int sizeBefore = m_result.length();
appendText(secondLevel, DELIMITERS_SECOND_LEVEL, false);
if (sizeBefore != m_result.length()) {
alreadyDecorated = true;
}
} else {
// make a forward lookup to the next elements of the word list to check
// if the combination of word and delimiter can be found as a decoration key
// an example would be "Dr." wich must be decorated with "Doctor"
StringBuffer decKey = new StringBuffer();
decKey.append(word);
// calculate how much forward looking must be made
int forwardLookup = wordList.size() - i - 1;
if (forwardLookup > FORWARD_LOOKUP) {
forwardLookup = FORWARD_LOOKUP;
}
if (i < wordCount - forwardLookup) {
for (int j = 1; j <= forwardLookup; j++) {
decKey.append(wordList.get(i + j));
decObj = (CmsDecorationObject)m_decorations.get(decKey.toString());
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_HTML_DECORATOR_DECORATION_FOUND_FWL_3,
decObj,
word,
new Integer(j)));
}
if (decObj != null) {
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_DECORATION_1,
decObj.getContentDecoration(
m_config,
decKey.toString(),
m_cms.getRequestContext().getLocale().toString())));
}
// decorate the current word with the following delimiter
m_result.append(decObj.getContentDecoration(
m_config,
decKey.toString(),
m_cms.getRequestContext().getLocale().toString()));
// important, we must skip the next element of the list
i += j;
// reset the decObj
alreadyDecorated = true;
break;
}
}
}
if ((decObj == null) && (wordDecObj == null)) {
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_WORD_1,
word));
}
// no decoration was found, use the word alone
m_result.append(word);
}
}
//} else {
if ((wordDecObj != null) && !alreadyDecorated) {
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_DECORATION_1,
wordDecObj.getContentDecoration(
m_config,
word,
m_cms.getRequestContext().getLocale().toString())));
}
// decorate the current word
m_result.append(wordDecObj.getContentDecoration(
m_config,
word,
m_cms.getRequestContext().getLocale().toString()));
}
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(Messages.get().getBundle().key(
Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_ORIGINALTEXT_1,
text));
}
m_result.append(text);
}
}
/**
* Checks if a word contains a given delimiter.<p>
*
* @param word the word to test
* @param delimiters array of delimiter strings
* @return true if the word contains the delimiter, false otherwiese
*/
private boolean hasDelimiter(String word, String[] delimiters) {
boolean delim = false;
for (int i = 0; i < delimiters.length; i++) {
if (word.indexOf(delimiters[i]) > -1) {
delim = true;
break;
}
}
return delim;
}
/**
* Checks if a word must be decoded.<p>
*
* The given word is compared to a negative list of words which must not be decoded.<p>
*
* @param word the word to test
* @param wordList the list of words which must not be decoded
* @param count the count in the list
*
* @return true if the word must be decoded, false otherweise
*/
private boolean mustDecode(String word, List<String> wordList, int count) {
boolean decode = true;
String nextWord = null;
if (count < wordList.size() - 1) {
nextWord = wordList.get(count + 1);
}
// test if the current word contains a "&" and the following with a ";"
// if so, we must not decode the word
if ((nextWord != null) && (word.indexOf("&") > -1) && nextWord.startsWith(";")) {
return false;
} else {
// now scheck if the word matches one of the non decoder tokens
for (int i = 0; i < NON_TRANSLATORS.length; i++) {
if (word.startsWith(NON_TRANSLATORS[i])) {
decode = false;
break;
}
}
}
return decode;
}
/**
* Checks if a word starts with a given delimiter.<p>
*
* @param word the word to test
* @param delimiters array of delimiter strings
* @return true if the word starts with the delimiter, false otherwiese
*/
private boolean startsWithDelimiter(String word, String[] delimiters) {
boolean delim = false;
for (int i = 0; i < delimiters.length; i++) {
if (word.startsWith(delimiters[i])) {
delim = true;
break;
}
}
return delim;
}
}