/* * sulky-modules - several general-purpose modules. * Copyright (C) 2007-2016 Joern Huxhorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * Copyright 2007-2016 Joern Huxhorn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.huxhorn.sulky.formatting; import java.util.Locale; public final class SimpleXml { // below constants are the valid ranges of XML characters // according to http://www.w3.org/TR/REC-xml#charsets private static final int XML_CHAR_RANGE_A_START = 0x000020; private static final int XML_CHAR_RANGE_A_END = 0x00D7FF; private static final int XML_CHAR_RANGE_B_START = 0x00E000; private static final int XML_CHAR_RANGE_B_END = 0x00FFFD; private static final int XML_CHAR_RANGE_C_START = 0x010000; private static final int XML_CHAR_RANGE_C_END = 0x10FFFF; static { // for the sake of coverage new SimpleXml(); } private SimpleXml() { } /** * Tests a given character whether or not it is a valid XML character. * <p> * For reference, please see * <a href="http://www.w3.org/TR/REC-xml#charsets">the * specification</a>. * * @param character The character to test * @return whether or not the supplied character is a valid XML character */ public static boolean isValidXMLCharacter(char character) { return isValidXMLCharacter(0xFFFF & character); } public static boolean isValidXMLCharacter(int codePoint) { return codePoint == '\t' || codePoint == '\r' || codePoint == '\n' || (codePoint >= XML_CHAR_RANGE_A_START && codePoint <= XML_CHAR_RANGE_A_END) || (codePoint >= XML_CHAR_RANGE_B_START && codePoint <= XML_CHAR_RANGE_B_END) || (codePoint >= XML_CHAR_RANGE_C_START && codePoint <= XML_CHAR_RANGE_C_END); } /** * Replaces the characters '&', '<', '>' and '"' with their respective xml-entities. Does also replace a zero byte with space. * * @param input the input that will be xml-escaped * @return the xml-escaped input. */ public static String escape(String input) { String result = input; result = result.replace((char) 0, ' '); result = result.replace("&", "&"); result = result.replace("<", "<"); result = result.replace(">", ">"); result = result.replace("\"", """); //result=result.replace("'", "'"); // apos is not escaped because swing html does not know about '... return result; } /** * Reverses escape with the exception of the zero-byte escape. * * @param input the input that will be xml-unescaped * @return the unescaped string. */ public static String unescape(String input) { String result = input; //result=result.replace("'", "'"); result = result.replace(""", "\""); result = result.replace(">", ">"); result = result.replace("<", "<"); result = result.replace("&", "&"); return result; } /** * <p> * This method ensures that the output String has only * valid XML unicode characters as specified by the * XML 1.0 standard. * For reference, please see * <a href="http://www.w3.org/TR/REC-xml#charsets">the * specification</a>. * </p> * <p> * Based on code from http://cse-mjmcl.cse.bris.ac.uk/blog/2007/02/14/1171465494443.html * </p> * <p> * This method takes into account that no change will be necessary most of the time so * nothing will be allocated/changed until the first non-valid character is found. * </p> * * @param in The String whose non-valid characters we want to remove. * @param replacementChar the character to replace invalid characters with. * @return The in String, with non-valid characters replaced by replacementChar. * @throws IllegalArgumentException if <code>replacementChar</code> is an invalid character itself. */ public static String replaceNonValidXMLCharacters(String in, char replacementChar) { StringBuilder out = null; if (!isValidXMLCharacter(replacementChar)) { throw new IllegalArgumentException("Replacement character 0x" + Integer.toString(replacementChar, 16).toUpperCase(Locale.US) + " is invalid itself!"); } for (int i = 0; i < in.length(); i++) { int current = in.codePointAt(i); if (isValidXMLCharacter(current)) { continue; } if (out == null) { out = new StringBuilder(in); } out.setCharAt(i, replacementChar); } if (out != null) { return out.toString(); } return in; // no change. } }