package de.is24.common.xml; /** * needed for: producing only XML-valid characters in the XML responses. needed * because: real-estates-description fields in TERMS may contain characters in * an unexpected encoding. */ public class XmlSafeCharacterAdapter { public static String unmarshal(String value) { return value; } public static String marshal(String v) { if (v != null) { return stripOrTranslateNonValidXMLCharacters(v); } else { return null; } } private static String stripOrTranslateNonValidXMLCharacters(String in) { if (in == null || in.isEmpty()) { return ""; } StringBuilder out = new StringBuilder(); for (int i = 0; i < in.length(); i++) { char current = in.charAt(i); if (isSafeChar(current)) { out.append(current); } else { // we are very kind and will translate some unsafe // ISO-8859-1, CP1252 chars if (current == 0x80) { // EURO out.append('\u20AC'); } else if (current == 0x84) { // double low quotation mark out.append('\u201E'); } else if (current == 0x91) { // left single quotation mark out.append('\u2018'); } else if (current == 0x92) { // right single quotation mark out.append('\u2019'); } else if (current == 0x93) { // left double quotation mark out.append('\u201C'); } else if (current == 0x94) { // right double quotation mark out.append('\u201D'); } else if ((current == 0x96) || (current == 0x97)) { // simplify // some // variants // of // hyphens // to // minus out.append('-'); } } } return out.toString(); } private static boolean isSafeChar(char current) { return ( // OR expression for safe characters // single safe characters (current == 0x9) || (current == 0xA) || (current == 0xD) || // safe character ranges ((current >= 0x20) && (current <= 0x7F)) || // windows // chars // from 0x80 // to 0x9F // are not // always // XML-safe ((current >= 0xA0) && (current <= 0xD7FF)) || ((current >= 0xE000) && (current <= 0xFFFD)) || ((current >= 0x10000) && (current <= 0x10FFFF))) && ( // AND expression for forbidden characters // single invalid chracters (current != 0xBD) && (current != 0xBF) && (current != 0xEF)); } }