/*
* © Copyright IBM Corp. 2012-2013
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.ibm.commons.util;
/**
* HTML Text utilities.
* <p>
* This class is used to convert Strings to HTML back and forth, by
* evaluating/generating entities for the special characters.
* </p>
*
* @ibm-not-published entities should be encoded by the ResponseWriter, not by
* this class, because the ResponseWriter will detect XHTML
* mode, and will write out #160 instead of nbsp type
* entities, because the html-only entities do not work in
* XHTML mode.
* @deprecated entities should be encoded by the ResponseWriter, or use com.ibm.xsp.util.HtmlUtil
*/
public final class HtmlTextUtil {
private static String[] htmlEntities=null;
static {
String[] entities=new String[256];
entities[32] ="nbsp"; //$NON-NLS-1$
entities[34] ="quot"; // '"' //$NON-NLS-1$
entities[38] ="amp"; // '&' //$NON-NLS-1$
entities[60] ="lt"; // '<' //$NON-NLS-1$
entities[62] ="gt"; // '>' //$NON-NLS-1$
entities[160]="nbsp"; //$NON-NLS-1$
entities[161]="iexcl"; //$NON-NLS-1$
entities[162]="cent"; //$NON-NLS-1$
entities[163]="pound"; //$NON-NLS-1$
entities[164]="curren"; //$NON-NLS-1$
entities[165]="yen"; //$NON-NLS-1$
entities[166]="brvbar"; //$NON-NLS-1$
entities[167]="sect"; //$NON-NLS-1$
entities[168]="uml"; //$NON-NLS-1$
entities[169]="copy"; //$NON-NLS-1$
entities[170]="ordf"; //$NON-NLS-1$
entities[171]="laquo"; //$NON-NLS-1$
entities[172]="not"; //$NON-NLS-1$
entities[173]="shy"; //$NON-NLS-1$
entities[174]="reg"; //$NON-NLS-1$
entities[175]="macr"; //$NON-NLS-1$
entities[176]="deg"; //$NON-NLS-1$
entities[177]="plusmn"; //$NON-NLS-1$
entities[178]="sup2"; //$NON-NLS-1$
entities[179]="sup3"; //$NON-NLS-1$
entities[180]="acute"; //$NON-NLS-1$
entities[181]="micro"; //$NON-NLS-1$
entities[182]="para"; //$NON-NLS-1$
entities[183]="middot"; //$NON-NLS-1$
entities[184]="cedil"; //$NON-NLS-1$
entities[185]="sup1"; //$NON-NLS-1$
entities[186]="ordm"; //$NON-NLS-1$
entities[187]="raquo"; //$NON-NLS-1$
entities[188]="frac14"; //$NON-NLS-1$
entities[189]="frac12"; //$NON-NLS-1$
entities[190]="frac34"; //$NON-NLS-1$
entities[191]="iquest"; //$NON-NLS-1$
entities[192]="Agrave"; //$NON-NLS-1$
entities[193]="Aacute"; //$NON-NLS-1$
entities[194]="Acirc"; //$NON-NLS-1$
entities[195]="Atilde"; //$NON-NLS-1$
entities[196]="Auml"; //$NON-NLS-1$
entities[197]="Aring"; //$NON-NLS-1$
entities[198]="AElig"; //$NON-NLS-1$
entities[199]="Ccedil"; //$NON-NLS-1$
entities[200]="Egrave"; //$NON-NLS-1$
entities[201]="Eacute"; //$NON-NLS-1$
entities[202]="Ecirc"; //$NON-NLS-1$
entities[203]="Euml"; //$NON-NLS-1$
entities[204]="Igrave"; //$NON-NLS-1$
entities[205]="Iacute"; //$NON-NLS-1$
entities[206]="Icirc"; //$NON-NLS-1$
entities[207]="Iuml"; //$NON-NLS-1$
entities[208]="ETH"; //$NON-NLS-1$
entities[209]="Ntilde"; //$NON-NLS-1$
entities[210]="Ograve"; //$NON-NLS-1$
entities[211]="Oacute"; //$NON-NLS-1$
entities[212]="Ocirc"; //$NON-NLS-1$
entities[213]="Otilde"; //$NON-NLS-1$
entities[214]="Ouml"; //$NON-NLS-1$
entities[215]="times"; //$NON-NLS-1$
entities[216]="Oslash"; //$NON-NLS-1$
entities[217]="Ugrave"; //$NON-NLS-1$
entities[218]="Uacute"; //$NON-NLS-1$
entities[219]="Ucirc"; //$NON-NLS-1$
entities[220]="Uuml"; //$NON-NLS-1$
entities[221]="Yacute"; //$NON-NLS-1$
entities[222]="THORN"; //$NON-NLS-1$
entities[223]="szlig"; //$NON-NLS-1$
entities[224]="agrave"; //$NON-NLS-1$
entities[225]="aacute"; //$NON-NLS-1$
entities[226]="acirc"; //$NON-NLS-1$
entities[227]="atilde"; //$NON-NLS-1$
entities[228]="auml"; //$NON-NLS-1$
entities[229]="aring"; //$NON-NLS-1$
entities[230]="aelig"; //$NON-NLS-1$
entities[231]="ccedil"; //$NON-NLS-1$
entities[232]="egrave"; //$NON-NLS-1$
entities[233]="eacute"; //$NON-NLS-1$
entities[234]="ecirc"; //$NON-NLS-1$
entities[235]="euml"; //$NON-NLS-1$
entities[236]="igrave"; //$NON-NLS-1$
entities[237]="iacute"; //$NON-NLS-1$
entities[238]="icirc"; //$NON-NLS-1$
entities[239]="iuml"; //$NON-NLS-1$
entities[240]="eth"; //$NON-NLS-1$
entities[241]="ntilde"; //$NON-NLS-1$
entities[242]="ograve"; //$NON-NLS-1$
entities[243]="oacute"; //$NON-NLS-1$
entities[244]="ocirc"; //$NON-NLS-1$
entities[245]="otilde"; //$NON-NLS-1$
entities[246]="ouml"; //$NON-NLS-1$
entities[247]="divide"; //$NON-NLS-1$
entities[248]="oslash"; //$NON-NLS-1$
entities[249]="ugrave"; //$NON-NLS-1$
entities[250]="uacute"; //$NON-NLS-1$
entities[251]="ucirc"; //$NON-NLS-1$
entities[252]="uuml"; //$NON-NLS-1$
entities[253]="yacute"; //$NON-NLS-1$
entities[254]="thorn"; //$NON-NLS-1$
entities[255]="yuml"; //$NON-NLS-1$
htmlEntities=entities;
}
/**
* Converts a Java string to an HTML one, with internal entities into a Java string.
* Space character is replaced by
* @param s the Java string
* @return the converted string
* @ibm-api
*/
public static String toHTMLContentString(String s, boolean replaceSpaces) {
return toHTMLString(s, replaceSpaces);
}
/**
* Converts a Java string to an HTML one, with internal entities into a Java string.
* Space character is not replaced by
* @param s the Java string
* @return the converted string
* @ibm-api
*/
public static String toHTMLAttributeString(String s) {
return toHTMLString(s, false);
}
public static String getEntity(char c) {
if (c<256) {
return htmlEntities[c];
}
if( c==0x20AC ) {
return "euro"; //$NON-NLS-1$
}
return null;
}
private static String toHTMLString(String s, boolean replaceSpaces) {
if( StringUtil.isEmpty(s) ) {
return s;
}
FastStringBuffer b = null;
int length = s.length();
for( int i=0; i<length; i++ ) {
char c = s.charAt(i);
// Is it a specific entity ?
String replaceLabel=null;
String replaceNumber=null;
if (c==' ') {
if (replaceSpaces) {
replaceLabel=htmlEntities[c];
} else {
// Nothing to do : in an attribute, don't convert the character ' '
}
} else if (c<256) {
replaceLabel=htmlEntities[c];
} else {
if( c==0x20AC ) {
replaceLabel="euro"; //$NON-NLS-1$
} else {
replaceNumber=Integer.toString(c);
}
}
if (replaceLabel!=null || replaceNumber!=null) {
if( b==null ) {
b = new FastStringBuffer();
b.append(s, 0, i);
}
b.append("&"); //$NON-NLS-1$
if (replaceLabel!=null) {
b.append(replaceLabel);
} else {
b.append("#"); //$NON-NLS-1$
b.append(replaceNumber);
}
b.append(";"); //$NON-NLS-1$
} else if( b!=null ) {
b.append(c);
}
}
return b!=null ? b.toString() : s;
}
/**
* Converts an HTML string to a Java one, converting the numeric entities (Ӓ) sent by the browser.
* @param s the HTML string
* @return the converted string
* @ibm-api
*/
public static String fromHTMLInputString(String s) {
if (StringUtil.isEmpty(s)) {
return s;
}
int l=s.length();
FastStringBuffer b = null;
int start=0;
int firstChar;
while ((firstChar=s.indexOf("", start))!=-1) { //$NON-NLS-1$
boolean ok=false;
int lastChar=firstChar+2;
for (;;) {
if (lastChar>=l) {
break;
}
char c=s.charAt(lastChar);
if (c==';') {
ok=true;
break;
}
if (c<'0' || c>'9') {
break;
}
lastChar++;
}
if (ok) {
int n=Integer.parseInt(s.substring(firstChar+2, lastChar));
if (b==null) {
b = new FastStringBuffer();
b.append(s, 0, firstChar);
} else {
b.append(s, start, firstChar);
}
b.append((char)n);
start=lastChar+1;
} else {
start=firstChar+2;
}
}
if (b!=null && start<l) {
b.append(s, start, l);
}
return b!=null ? b.toString() : s;
}
/**
* Converts an HTML string to a Java one, converting the all the HTML entities.
* @param s the HTML string
* @return the converted string
* @ibm-api
*/
public static String fromHTML(String s) {
if (StringUtil.isEmpty(s)) {
return s;
}
FastStringBuffer b = null;
char[] chars = s.toCharArray();
int length = chars.length;
int entityStart=-1;
for( int i=0; i<length; i++ ) {
char c=chars[i];
if (c=='&') {
if (entityStart!=-1) {
// Forget the previous entity (invalid)
if (b!=null) {
b.append(s, entityStart, i);
}
}
entityStart=i;
continue;
}
if (c==';' && entityStart!=-1) {
// End of entity
String ent=s.substring(entityStart+1, i); // Without the '&' and the ';'
int entChar=-1;
if (!StringUtil.isEmpty(ent)) {
if (ent.charAt(0)=='#') {
// Numeric entity
boolean hexa=false;
if (ent.length()>1 && (ent.charAt(1)=='x' || ent.charAt(1)=='X')) {
hexa=true;
}
boolean num=true;
for (int k=(hexa ? 2 : 1); k<ent.length(); k++) {
char ch=ent.charAt(k);
boolean ok=false;
if (ch>='0' && ch<='9') {
ok=true;
} else if (hexa) {
if ((ch>='a' && ch<='f') ||
(ch>='A' && ch<='F')) {
ok=true;
}
}
if (!ok) {
num=false;
break;
}
}
if (num) {
try {
if (hexa) {
entChar=Integer.parseInt(ent.substring(1), 16);
} else {
// decimal
entChar=Integer.parseInt(ent.substring(1));
}
} catch (NumberFormatException e) {
}
}
} else {
// Non-numeric entity
if(ent.equals("euro")) { //$NON-NLS-1$
entChar=0x20AC;
} else {
for (int k=0; k<htmlEntities.length; k++) {
String htmlEntity=htmlEntities[k];
if (htmlEntity!=null) {
if (ent.equals(htmlEntity)) {
entChar=k;
break;
}
}
}
}
}
}
// Create the buffer if it does not exist
if( b==null ) {
b = new FastStringBuffer();
if (entityStart>0) {
b.append(s, 0, entityStart);
}
}
if (entChar!=-1) {
b.append((char)entChar);
} else {
// Forget !
b.append(s, entityStart, i);
}
entityStart=-1;
continue;
}
if (b!=null && entityStart==-1) {
b.append(c);
}
}
return b!=null ? b.toString() : s;
}
}