/* * Copyright (c) 1998-2011 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the * * Free Software Foundation, Inc. * 59 Temple Place, Suite 330 * Boston, MA 02111-1307 USA * * @author Scott Ferguson */ package com.caucho.quercus.lib; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.util.Iterator; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import com.caucho.quercus.QuercusModuleException; import com.caucho.quercus.annotation.Optional; import com.caucho.quercus.env.ArrayValue; import com.caucho.quercus.env.ArrayValueImpl; import com.caucho.quercus.env.ConstArrayValue; import com.caucho.quercus.env.Env; import com.caucho.quercus.env.LongValue; import com.caucho.quercus.env.StringBuilderValue; import com.caucho.quercus.env.StringValue; import com.caucho.quercus.env.Value; import com.caucho.quercus.module.AbstractQuercusModule; import com.caucho.util.L10N; import com.caucho.vfs.Encoding; import com.caucho.vfs.i18n.EncodingWriter; /** * PHP functions implementing html code. */ public class HtmlModule extends AbstractQuercusModule { private static final Logger log = Logger.getLogger(HtmlModule.class.getName()); private static final L10N L = new L10N(HtmlModule.class); public static final int HTML_SPECIALCHARS = 0; public static final int HTML_ENTITIES = 1; public static final int ENT_HTML_QUOTE_NONE = 0; public static final int ENT_HTML_QUOTE_SINGLE = 1; public static final int ENT_HTML_QUOTE_DOUBLE = 2; public static final int ENT_COMPAT = ENT_HTML_QUOTE_DOUBLE; public static final int ENT_QUOTES = ENT_HTML_QUOTE_SINGLE | ENT_HTML_QUOTE_DOUBLE; public static final int ENT_NOQUOTES = ENT_HTML_QUOTE_NONE; private static StringValue []HTML_SPECIALCHARS_MAP; private static ArrayValue HTML_SPECIALCHARS_ARRAY; private static ArrayValue HTML_ENTITIES_ARRAY; private static ArrayValue HTML_ENTITIES_ARRAY_ENTITY_KEY; private static ArrayValueImpl HTML_ENTITIES_ARRAY_UNICODE; private static ArrayValueImpl HTML_SPECIALCHARS_ARRAY_UNICODE; private static ArrayValueImpl HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY; public HtmlModule() { } private static ConstArrayValue toUnicodeArray(Env env, ArrayValue array) { ArrayValueImpl copy = new ArrayValueImpl(); Iterator<Map.Entry<Value,Value>> iter = array.getIterator(env); while (iter.hasNext()) { Map.Entry<Value,Value> entry = iter.next(); Value key = entry.getKey(); Value value = entry.getValue(); if (key.isString()) key = key.toUnicodeValue(env); if (value.isString()) value = value.toUnicodeValue(env); copy.put(key, value); } return new ConstArrayValue(copy); } /** * Returns HTML translation tables. */ public Value get_html_translation_table( Env env, @Optional("HTML_SPECIALCHARS") int table, @Optional("ENT_COMPAT") int quoteStyle) { Value result; if (! env.isUnicodeSemantics()) { if (table == HTML_ENTITIES) result = HTML_ENTITIES_ARRAY.copy(); else result = HTML_SPECIALCHARS_ARRAY.copy(); } else { if (table == HTML_ENTITIES) { if (HTML_ENTITIES_ARRAY_UNICODE == null) { HTML_ENTITIES_ARRAY_UNICODE = toUnicodeArray( env, HTML_ENTITIES_ARRAY); } result = HTML_ENTITIES_ARRAY_UNICODE.copy(); } else { if (HTML_SPECIALCHARS_ARRAY_UNICODE == null) { HTML_SPECIALCHARS_ARRAY_UNICODE = toUnicodeArray( env, HTML_SPECIALCHARS_ARRAY); } result = HTML_SPECIALCHARS_ARRAY_UNICODE.copy(); } } if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0) result.put(env.createString('\''), env.createString("'")); if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0) result.put(env.createString('"'), env.createString(""")); return result; } /* * Converts escaped HTML entities back to characters. * * @param str escaped string * @param quoteStyle optional quote style used */ public static StringValue htmlspecialchars_decode(Env env, StringValue str, @Optional("ENT_COMPAT") int quoteStyle) { int len = str.length(); StringValue sb = str.createStringBuilder(len * 4 / 5); for (int i = 0; i < len; i++) { char ch = str.charAt(i); if (ch != '&') { sb.append(ch); continue; } switch (str.charAt(i + 1)) { case 'a': sb.append('&'); if (i + 4 < len && str.charAt(i + 2) == 'm' && str.charAt(i + 3) == 'p' && str.charAt(i + 4) == ';') { i += 4; } break; case 'q': if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0 && i + 5 < len && str.charAt(i + 2) == 'u' && str.charAt(i + 3) == 'o' && str.charAt(i + 4) == 't' && str.charAt(i + 5) == ';') { i += 5; sb.append('"'); } else sb.append('&'); break; case '#': if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0 && i + 5 < len && str.charAt(i + 2) == '0' && str.charAt(i + 3) == '3' && str.charAt(i + 4) == '9' && str.charAt(i + 5) == ';') { i += 5; sb.append('\''); } else sb.append('&'); break; case 'l': if (i + 3 < len && str.charAt(i + 2) == 't' && str.charAt(i + 3) == ';') { i += 3; sb.append('<'); } else sb.append('&'); break; case 'g': if (i + 3 < len && str.charAt(i + 2) == 't' && str.charAt(i + 3) == ';') { i += 3; sb.append('>'); } else sb.append('&'); break; default: sb.append('&'); } } return sb; } /** * Escapes HTML * * @param env the calling environment * @param string the string to be trimmed * @param quoteStyleV optional quote style * @param charsetV optional charset style * @return the trimmed string */ public static Value htmlspecialchars(Env env, StringValue string, @Optional("ENT_COMPAT") int quoteStyle, @Optional String charset) { int len = string.length(); StringValue sb = string.createStringBuilder(len * 5 / 4); for (int i = 0; i < len; i++) { char ch = string.charAt(i); switch (ch) { case '&': sb.append("&"); break; case '"': if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0) sb.append("""); else sb.append(ch); break; case '\'': if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0) sb.append("'"); else sb.append(ch); break; case '<': sb.append("<"); break; case '>': sb.append(">"); break; default: sb.append(ch); break; } } return sb; } /** * Escapes HTML * * @param env the calling environment * @param stringV the string to be trimmed * @param quoteStyleV optional quote style * @param charsetV optional charset style * @return the trimmed string */ public static Value htmlentities(Env env, StringValue string, @Optional("ENT_COMPAT") int quoteStyle, @Optional String charset) { if (charset == null || charset.length() == 0) charset = "ISO-8859-1"; Reader reader; try { reader = string.toReader(charset); } catch (UnsupportedEncodingException e) { env.warning(e); reader = new StringReader(string.toString()); } StringValue sb = string.createStringBuilder(string.length() * 5 / 4); int ch; try { while ((ch = reader.read()) >= 0) { StringValue entity = HTML_SPECIALCHARS_MAP[ch & 0xffff]; if (ch == '"') { if ((quoteStyle & ENT_HTML_QUOTE_DOUBLE) != 0) sb.append("""); else sb.append('"'); } else if (ch == '\'') { if ((quoteStyle & ENT_HTML_QUOTE_SINGLE) != 0) sb.append("'"); else sb.append('\''); } else if (entity != null) sb.append(entity); else if (env.isUnicodeSemantics() || 0x00 <= ch && ch <= 0xff) { sb.append((char) ch); } else { sb.append("&#"); sb.append(hexdigit(ch >> 12)); sb.append(hexdigit(ch >> 8)); sb.append(hexdigit(ch >> 4)); sb.append(hexdigit(ch)); sb.append(";"); } } } catch (IOException e) { throw new QuercusModuleException(e); } return sb; } private static char hexdigit(int ch) { ch = ch & 0xf; if (ch < 10) return (char) (ch + '0'); else return (char) (ch - 10 + 'a'); } /** * Escapes HTML * * @param string the string to be trimmed * @param quoteStyle optional quote style * @param charset optional charset style * @return the trimmed string */ public static StringValue html_entity_decode(Env env, StringValue string, @Optional int quoteStyle, @Optional String charset) { if (string.length() == 0) return env.getEmptyString(); ArrayValue htmlEntities = null; boolean isUnicode = env.isUnicodeSemantics(); if (isUnicode) { if (HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY == null) { HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY = toUnicodeArray( env, HTML_ENTITIES_ARRAY_ENTITY_KEY); } htmlEntities = HTML_ENTITIES_ARRAY_UNICODE_ENTITY_KEY; } else htmlEntities = HTML_ENTITIES_ARRAY_ENTITY_KEY; EncodingWriter out = null; if (! isUnicode) { if (charset == null || charset.length() == 0) charset = env.getRuntimeEncoding(); out = Encoding.getWriteEncoding(charset); } int len = string.length(); int htmlEntityStart = -1; StringValue result = env.createStringBuilder(); try { // Loop through each character for (int i = 0; i < len; i++) { char ch = string.charAt(i); // Check whether it's a html entity // i.e. starts with '&' and ends with ';' if (ch == '&' && htmlEntityStart < 0) { htmlEntityStart = i; } else if (htmlEntityStart < 0) { // else add it to result. result.append(ch); } else if (ch == ';') { // If so substitute the entity and add it to result. StringValue entity = string.substring(htmlEntityStart, i + 1); Value value = htmlEntities.get(entity); if (value.isNull()) { result.append(entity); } else if (isUnicode) { result.append((char)value.toInt()); } else { out.write(result, (char)value.toInt()); } htmlEntityStart = -1; } else if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) { } else { result.append('&'); i = htmlEntityStart; htmlEntityStart = -1; } } if (htmlEntityStart > 0) { result.append(string, htmlEntityStart, len); } } catch (IOException e) { log.log(Level.FINE, e.toString(), e); } return result; } /** * Replaces newlines with HTML breaks. * * @param env the calling environment */ public static Value nl2br(Env env, StringValue string) { int strLen = string.length(); StringValue sb = string.createStringBuilder(strLen * 5 / 4); for (int i = 0; i < strLen; i++) { char ch = string.charAt(i); if (ch == '\n') { sb.append("<br />\n"); } else if (ch == '\r') { if (i + 1 < strLen && string.charAt(i + 1) == '\n') { sb.append("<br />\r\n"); i++; } else { sb.append("<br />\r"); } } else { sb.append(ch); } } return sb; } private static void entity(ArrayValue array, StringValue []map, ArrayValue revMap, int ch, String entity) { // XXX: i18n and optimize static variables usage array.put("" + (char) ch, entity); StringValue entityValue = new StringBuilderValue(entity); map[ch & 0xffff] = entityValue; revMap.put(entityValue, LongValue.create(ch)); } static { ArrayValueImpl array = new ArrayValueImpl(); array.put("<", "<"); array.put(">", ">"); array.put("&", "&"); HTML_SPECIALCHARS_ARRAY = new ConstArrayValue(array); StringValue []map = new StringValue[65536]; HTML_SPECIALCHARS_MAP = map; ArrayValue revMap = new ArrayValueImpl(); HTML_ENTITIES_ARRAY_ENTITY_KEY = revMap; array = new ArrayValueImpl(); entity(array, map, revMap, '<', "<"); entity(array, map, revMap, '>', ">"); entity(array, map, revMap, '&', "&"); entity(array, map, revMap, 160, " "); entity(array, map, revMap, 161, "¡"); entity(array, map, revMap, 162, "¢"); entity(array, map, revMap, 163, "£"); entity(array, map, revMap, 164, "¤"); entity(array, map, revMap, 165, "¥"); entity(array, map, revMap, 166, "¦"); entity(array, map, revMap, 167, "§"); entity(array, map, revMap, 168, "¨"); entity(array, map, revMap, 169, "©"); entity(array, map, revMap, 170, "ª"); entity(array, map, revMap, 171, "«"); entity(array, map, revMap, 172, "¬"); entity(array, map, revMap, 173, "­"); entity(array, map, revMap, 174, "®"); entity(array, map, revMap, 175, "¯"); entity(array, map, revMap, 176, "°"); entity(array, map, revMap, 177, "±"); entity(array, map, revMap, 178, "²"); entity(array, map, revMap, 179, "³"); entity(array, map, revMap, 180, "´"); entity(array, map, revMap, 181, "µ"); entity(array, map, revMap, 182, "¶"); entity(array, map, revMap, 183, "·"); entity(array, map, revMap, 184, "¸"); entity(array, map, revMap, 185, "¹"); entity(array, map, revMap, 186, "º"); entity(array, map, revMap, 187, "»"); entity(array, map, revMap, 188, "¼"); entity(array, map, revMap, 189, "½"); entity(array, map, revMap, 190, "¾"); entity(array, map, revMap, 191, "¿"); entity(array, map, revMap, 192, "À"); entity(array, map, revMap, 193, "Á"); entity(array, map, revMap, 194, "Â"); entity(array, map, revMap, 195, "Ã"); entity(array, map, revMap, 196, "Ä"); entity(array, map, revMap, 197, "Å"); entity(array, map, revMap, 198, "Æ"); entity(array, map, revMap, 199, "Ç"); entity(array, map, revMap, 200, "È"); entity(array, map, revMap, 201, "É"); entity(array, map, revMap, 202, "Ê"); entity(array, map, revMap, 203, "Ë"); entity(array, map, revMap, 204, "Ì"); entity(array, map, revMap, 205, "Í"); entity(array, map, revMap, 206, "Î"); entity(array, map, revMap, 207, "Ï"); entity(array, map, revMap, 208, "Ð"); entity(array, map, revMap, 209, "Ñ"); entity(array, map, revMap, 210, "Ò"); entity(array, map, revMap, 211, "Ó"); entity(array, map, revMap, 212, "Ô"); entity(array, map, revMap, 213, "Õ"); entity(array, map, revMap, 214, "Ö"); entity(array, map, revMap, 215, "×"); entity(array, map, revMap, 216, "Ø"); entity(array, map, revMap, 217, "Ù"); entity(array, map, revMap, 218, "Ú"); entity(array, map, revMap, 219, "Û"); entity(array, map, revMap, 220, "Ü"); entity(array, map, revMap, 221, "Ý"); entity(array, map, revMap, 222, "Þ"); entity(array, map, revMap, 223, "ß"); entity(array, map, revMap, 224, "à"); entity(array, map, revMap, 225, "á"); entity(array, map, revMap, 226, "â"); entity(array, map, revMap, 227, "ã"); entity(array, map, revMap, 228, "ä"); entity(array, map, revMap, 229, "å"); entity(array, map, revMap, 230, "æ"); entity(array, map, revMap, 231, "ç"); entity(array, map, revMap, 232, "è"); entity(array, map, revMap, 233, "é"); entity(array, map, revMap, 234, "ê"); entity(array, map, revMap, 235, "ë"); entity(array, map, revMap, 236, "ì"); entity(array, map, revMap, 237, "í"); entity(array, map, revMap, 238, "î"); entity(array, map, revMap, 239, "ï"); entity(array, map, revMap, 240, "ð"); entity(array, map, revMap, 241, "ñ"); entity(array, map, revMap, 242, "ò"); entity(array, map, revMap, 243, "ó"); entity(array, map, revMap, 244, "ô"); entity(array, map, revMap, 245, "õ"); entity(array, map, revMap, 246, "ö"); entity(array, map, revMap, 247, "÷"); entity(array, map, revMap, 248, "ø"); entity(array, map, revMap, 249, "ù"); entity(array, map, revMap, 250, "ú"); entity(array, map, revMap, 251, "û"); entity(array, map, revMap, 252, "ü"); entity(array, map, revMap, 253, "ý"); entity(array, map, revMap, 254, "þ"); entity(array, map, revMap, 255, "ÿ"); // XXX: charset, order it. entity(array, map, revMap, 0x2002, " "); entity(array, map, revMap, 0x2009, " "); entity(array, map, revMap, 0x2018, "‘"); entity(array, map, revMap, 0x2020, "†"); entity(array, map, revMap, 0x2032, "′"); entity(array, map, revMap, 0x2044, "⁄"); entity(array, map, revMap, 0x20ac, "€"); HTML_ENTITIES_ARRAY = new ConstArrayValue(array); } }