/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.camel.converter.jaxb; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Provides filtering of characters that do fall into <a * href="http://www.w3.org/TR/2004/REC-xml-20040204/#NT-Char">range defined by * XML 1.0 spec</a>. <i>Filtering</i> here means replacement with space char. * * */ class NonXmlCharFilterer { private static final Logger LOG = LoggerFactory.getLogger(FilteringXmlStreamWriter.class); private static final char REPLACEMENT_CHAR = ' '; /** * Determines whether specified character needs to be filtered. */ boolean isFiltered(char c) { // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | // [#x10000-#x10FFFF] // Won't be checking last interval, as it goes beyond 0xFFFF. if (c == 0x9 || c == 0xA || c == 0xD || (c >= 0x20 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD)) { return false; } return true; } /** * Filter specified char array by replacing non-XML chars with space. Only * part of array specified by <code>offset</code> and <code>length</code> is * affected. * * @return <code>true</code> if <code>content</code> was modified, * <code>false</code> otherwise. */ public boolean filter(char[] content, int offset, int length) { if (content == null) { return false; } boolean filtered = false; for (int i = offset; i < offset + length; i++) { if (isFiltered(content[i])) { filtered = true; content[i] = REPLACEMENT_CHAR; } } if (filtered) { LOG.warn("Identified and replaced non-XML chars"); } return filtered; } /** * Filter specified string by replacing illegal chars with space. * * @return filtered string */ public String filter(String original) { if (original == null) { return null; } char[] chars = original.toCharArray(); if (!filter(chars, 0, chars.length)) { return original; } String filtered = new String(chars); LOG.warn("Illegal characters were filtered; original => \"" + original + "\", filtered => \"" + filtered + "\""); return filtered; } }