/* * This program is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software * Foundation. * * You should have received a copy of the GNU Lesser General Public License along with this * program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html * or from the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * Copyright (c) 2001 - 2013 Object Refinery Ltd, Pentaho Corporation and Contributors.. All rights reserved. */ package org.pentaho.reporting.engine.classic.core.modules.output.table.html.util; /** * Utility methods to support HTML style encodings like the UTF and CSS encodings. * * @author Thomas Morgner */ public final class HtmlEncoderUtil { /** * CSS Escapes: CSS 2.1 / 4.1.3 Characters and case * <p/> * Third, backslash escapes allow authors to refer to characters they can't easily put in a document. In this case, * the backslash is followed by at most six hexadecimal digits (0..9A..F), which stand for the ISO 10646 ([ISO10646]) * character with that number, which must not be zero. (It is undefined in CSS 2.1 what happens if a style sheet does * contain a character with Unicode codepoint zero.) If a character in the range [0-9a-f] follows the hexadecimal * number, the end of the number needs to be made clear. There are two ways to do that: * <p/> * 1. with a space (or other whitespace character): "\26 B" ("&B"). In this case, user agents should treat a "CR/LF" * pair (U+000D/U+000A) as a single whitespace character.<br/> * 2. by providing exactly 6 hexadecimal digits: "\000026B" ("&B") * <p/> * In fact, these two methods may be combined. Only one whitespace character is ignored after a hexadecimal escape. * Note that this means that a "real" space after the escape sequence must itself either be escaped or doubled. */ /** * DefaultConstructor. */ private HtmlEncoderUtil() { } /** * Provides a method to encode any string into a URL-safe form. Non-ASCII characters are first encoded as sequences of * two or three bytes, using the UTF-8 algorithm, before being encoded as %HH escapes. */ private static final String[] HEX_CSS_ENCODING = new String[256]; static { // static initializer block for creating the Hex-Encoding array. This is as fast as having a static array // but reduces the code size. for ( int i = 0; i < 256; i++ ) { final String s = Integer.toHexString( i ); if ( s.length() == 1 ) { HEX_CSS_ENCODING[i] = '0' + s; } else { HEX_CSS_ENCODING[i] = s; } } } /** * Encode a string to the encoded form as defined in the CSS standard. * * @param s * The string to be encoded * @return The encoded string */ public static String encodeCSS( final String s ) { final StringBuffer sbuf = new StringBuffer( s.length() * 15 / 10 ); return encodeCSS( s, sbuf ); } public static String encodeCSS( final String s, final StringBuffer sbuf ) { final int len = s.length(); for ( int i = 0; i < len; i++ ) { final char ch = s.charAt( i ); if ( ch == '\"' ) { sbuf.append( '\\' ); sbuf.append( ch ); } else if ( ch >= 0x20 && ch <= 0x7f ) { // 7-Bit ascii sbuf.append( ch ); } else { sbuf.append( '\\' ); sbuf.append( HEX_CSS_ENCODING[( ch >> 16 ) & 0xFF] ); sbuf.append( HEX_CSS_ENCODING[( ( ch >> 8 ) & 0xFF )] ); sbuf.append( HEX_CSS_ENCODING[( ch & 0xFF )] ); } } return sbuf.toString(); } }