/** * Copyright 2007 Charlie Hubbard and Brandon Goodin * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package flexjson.transformer; import java.util.HashMap; import java.util.Map; /** * A helper class provided out of the box to encode characters that HTML can't support * natively like <, >, &, or ". This will scan the value passed to the transform * method and replace any of these special characters with the HTML encoded equivalent. This * method will NOT work for HTML text because it will blindly encode all characters it sees which * means it will strip out any HTML tags. */ public class HtmlEncoderTransformer extends AbstractTransformer { private static final Map<Integer, String> htmlEntities= new HashMap<Integer, String>(); public HtmlEncoderTransformer() { if (htmlEntities.isEmpty()) { htmlEntities.put(34, """); // " - double-quote htmlEntities.put(38, "&"); // & - ampersand // htmlEntities.put( 39, "'"); // ' - apostrophe htmlEntities.put(60, "<"); // < - less-than htmlEntities.put(62, ">"); // > - greater-than htmlEntities.put(160, " "); // non-breaking space htmlEntities.put(169, "©"); // � - copyright htmlEntities.put(174, "®"); // � - registered trademark htmlEntities.put(192, "À"); // � - uppercase A, grave accent htmlEntities.put(193, "Á"); // � - uppercase A, acute accent htmlEntities.put(194, "Â"); // � - uppercase A, circumflex accent htmlEntities.put(195, "Ã"); // � - uppercase A, tilde htmlEntities.put(196, "Ä"); // � - uppercase A, umlaut htmlEntities.put(197, "Å"); // � - uppercase A, ring htmlEntities.put(198, "Æ"); // � - uppercase AE htmlEntities.put(199, "Ç"); // � - uppercase C, cedilla htmlEntities.put(200, "È"); // � - uppercase E, grave accent htmlEntities.put(201, "É"); // � - uppercase E, acute accent htmlEntities.put(202, "Ê"); // � - uppercase E, circumflex accent htmlEntities.put(203, "Ë"); // � - uppercase E, umlaut htmlEntities.put(204, "Ì"); // � - uppercase I, grave accent htmlEntities.put(205, "Í"); // � - uppercase I, acute accent htmlEntities.put(206, "Î"); // � - uppercase I, circumflex accent htmlEntities.put(207, "Ï"); // � - uppercase I, umlaut htmlEntities.put(208, "Ð"); // � - uppercase Eth, Icelandic htmlEntities.put(209, "Ñ"); // � - uppercase N, tilde htmlEntities.put(210, "Ò"); // � - uppercase O, grave accent htmlEntities.put(211, "Ó"); // � - uppercase O, acute accent htmlEntities.put(212, "Ô"); // � - uppercase O, circumflex accent htmlEntities.put(213, "Õ"); // � - uppercase O, tilde htmlEntities.put(214, "Ö"); // � - uppercase O, umlaut htmlEntities.put(216, "Ø"); // � - uppercase O, slash htmlEntities.put(217, "Ù"); // � - uppercase U, grave accent htmlEntities.put(218, "Ú"); // � - uppercase U, acute accent htmlEntities.put(219, "Û"); // � - uppercase U, circumflex accent htmlEntities.put(220, "Ü"); // � - uppercase U, umlaut htmlEntities.put(221, "Ý"); // � - uppercase Y, acute accent htmlEntities.put(222, "Þ"); // � - uppercase THORN, Icelandic htmlEntities.put(223, "ß"); // � - lowercase sharps, German htmlEntities.put(224, "à"); // � - lowercase a, grave accent htmlEntities.put(225, "á"); // � - lowercase a, acute accent htmlEntities.put(226, "â"); // � - lowercase a, circumflex accent htmlEntities.put(227, "ã"); // � - lowercase a, tilde htmlEntities.put(228, "ä"); // � - lowercase a, umlaut htmlEntities.put(229, "å"); // � - lowercase a, ring htmlEntities.put(230, "æ"); // � - lowercase ae htmlEntities.put(231, "ç"); // � - lowercase c, cedilla htmlEntities.put(232, "è"); // � - lowercase e, grave accent htmlEntities.put(233, "é"); // � - lowercase e, acute accent htmlEntities.put(234, "ê"); // � - lowercase e, circumflex accent htmlEntities.put(235, "ë"); // � - lowercase e, umlaut htmlEntities.put(236, "ì"); // � - lowercase i, grave accent htmlEntities.put(237, "í"); // � - lowercase i, acute accent htmlEntities.put(238, "î"); // � - lowercase i, circumflex accent htmlEntities.put(239, "ï"); // � - lowercase i, umlaut htmlEntities.put(240, "ð"); // � - lowercase eth, Icelandic htmlEntities.put(241, "ñ"); // � - lowercase n, tilde htmlEntities.put(242, "ò"); // � - lowercase o, grave accent htmlEntities.put(243, "ó"); // � - lowercase o, acute accent htmlEntities.put(244, "ô"); // � - lowercase o, circumflex accent htmlEntities.put(245, "õ"); // � - lowercase o, tilde htmlEntities.put(246, "ö"); // � - lowercase o, umlaut htmlEntities.put(248, "ø"); // � - lowercase o, slash htmlEntities.put(249, "ù"); // � - lowercase u, grave accent htmlEntities.put(250, "ú"); // � - lowercase u, acute accent htmlEntities.put(251, "û"); // � - lowercase u, circumflex accent htmlEntities.put(252, "ü"); // � - lowercase u, umlaut htmlEntities.put(253, "ý"); // � - lowercase y, acute accent htmlEntities.put(254, "þ"); // � - lowercase thorn, Icelandic htmlEntities.put(255, "ÿ"); // � - lowercase y, umlaut htmlEntities.put(8364, "€"); // Euro symbol } } public void transform(Object value) { String val= value.toString(); getContext().write("\""); for (int i= 0; i < val.length(); ++i) { int intVal= (int) val.charAt(i); if (htmlEntities.containsKey(intVal)) { getContext().write(htmlEntities.get(intVal)); } else if (intVal > 128) { getContext().write("&#"); getContext().write(String.valueOf(intVal)); getContext().write(";"); } else { getContext().write(String.valueOf(val.charAt(i))); } } getContext().write("\""); } }