/**
* Copyright Intellectual Reserve, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gedcomx.util;
import javax.xml.namespace.NamespaceContext;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
/**
* Delegating {@link XMLStreamWriter} that filters out UTF-8 characters that
* are illegal in XML, replacing them with a
*
* @author Erik van Zijst (small change by Lennart Schedin, and
* isLegalXmlCharacter() expanded to XML 1.0 spec by Randy Wilson)
*/
public class CleanXMLStreamWriter implements XMLStreamWriter {
private final XMLStreamWriter writer;
// Unicode "REPLACEMENT_CHARACTER" (looks like a black diamond with a white question mark in the middle).
public static final char REPLACEMENT_CHARACTER = '\uFFFD';
public CleanXMLStreamWriter(XMLStreamWriter writer) {
if (null == writer) {
throw new IllegalArgumentException("null");
}
else {
this.writer = writer;
}
}
/**
* Substitutes all illegal characters in the given string by the value of
* {@link CleanXMLStreamWriter#REPLACEMENT_CHARACTER}. If no illegal characters
* were found, no copy is made and the given string is returned.
*
* @param string the string
* @return same string, if not illegal characters detected;
* otherwise, string with illegal characters replaced with {@link CleanXMLStreamWriter#REPLACEMENT_CHARACTER}
*/
protected static String escapeCharacters(String string) {
char[] copy = null;
boolean copied = false;
for (int i = 0; i < string.length();) {
int codePoint = string.codePointAt(i);
int size = Character.charCount(codePoint);
if (!isLegalXmlCodePoint(codePoint)) {
if (!copied) {
copy = string.toCharArray();
copied = true;
}
for (int j = 0; j < size; j++) {
copy[i + j] = REPLACEMENT_CHARACTER;
}
}
i += size;
}
return copied ? new String(copy) : string;
}
public void writeStartElement(String s) throws XMLStreamException {
writer.writeStartElement(s);
}
public void writeStartElement(String s, String s1) throws XMLStreamException {
writer.writeStartElement(s, s1);
}
public void writeStartElement(String s, String s1, String s2)
throws XMLStreamException {
writer.writeStartElement(s, s1, s2);
}
public void writeEmptyElement(String s, String s1) throws XMLStreamException {
writer.writeEmptyElement(s, s1);
}
public void writeEmptyElement(String s, String s1, String s2)
throws XMLStreamException {
writer.writeEmptyElement(s, s1, s2);
}
public void writeEmptyElement(String s) throws XMLStreamException {
writer.writeEmptyElement(s);
}
public void writeEndElement() throws XMLStreamException {
writer.writeEndElement();
}
public void writeEndDocument() throws XMLStreamException {
writer.writeEndDocument();
}
public void close() throws XMLStreamException {
writer.close();
}
public void flush() throws XMLStreamException {
writer.flush();
}
public void writeAttribute(String localName, String value) throws XMLStreamException {
writer.writeAttribute(localName, escapeCharacters(value));
}
public void writeAttribute(String prefix, String namespaceUri, String localName, String value)
throws XMLStreamException {
writer.writeAttribute(prefix, namespaceUri, localName, escapeCharacters(value));
}
public void writeAttribute(String namespaceUri, String localName, String value)
throws XMLStreamException {
writer.writeAttribute(namespaceUri, localName, escapeCharacters(value));
}
public void writeNamespace(String s, String s1) throws XMLStreamException {
writer.writeNamespace(s, s1);
}
public void writeDefaultNamespace(String s) throws XMLStreamException {
writer.writeDefaultNamespace(s);
}
public void writeComment(String s) throws XMLStreamException {
writer.writeComment(s);
}
public void writeProcessingInstruction(String s) throws XMLStreamException {
writer.writeProcessingInstruction(s);
}
public void writeProcessingInstruction(String s, String s1)
throws XMLStreamException {
writer.writeProcessingInstruction(s, s1);
}
public void writeCData(String s) throws XMLStreamException {
writer.writeCData(escapeCharacters(s));
}
public void writeDTD(String s) throws XMLStreamException {
writer.writeDTD(s);
}
public void writeEntityRef(String s) throws XMLStreamException {
writer.writeEntityRef(s);
}
public void writeStartDocument() throws XMLStreamException {
writer.writeStartDocument();
}
public void writeStartDocument(String s) throws XMLStreamException {
writer.writeStartDocument(s);
}
public void writeStartDocument(String s, String s1)
throws XMLStreamException {
writer.writeStartDocument(s, s1);
}
public void writeCharacters(String s) throws XMLStreamException {
writer.writeCharacters(escapeCharacters(s));
}
public void writeCharacters(char[] chars, int start, int len)
throws XMLStreamException {
writer.writeCharacters(escapeCharacters(new String(chars, start, len)));
}
public String getPrefix(String s) throws XMLStreamException {
return writer.getPrefix(s);
}
public void setPrefix(String s, String s1) throws XMLStreamException {
writer.setPrefix(s, s1);
}
public void setDefaultNamespace(String s) throws XMLStreamException {
writer.setDefaultNamespace(s);
}
public void setNamespaceContext(NamespaceContext namespaceContext)
throws XMLStreamException {
writer.setNamespaceContext(namespaceContext);
}
public NamespaceContext getNamespaceContext() {
return writer.getNamespaceContext();
}
public Object getProperty(String s) throws IllegalArgumentException {
return writer.getProperty(s);
}
/**
* Tell whether the given character is valid in an XML document.
*
* @param c - character
* @return true if valid in XML, false if it would make an XML invalid.
*/
protected static boolean isLegalXmlCodePoint(int c) {
/* From the XML 1.0 specifications document at http://www.w3.org/TR/REC-xml/#charsets:
Char :: = #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/
return (c >= 0x20 && c <= 0xD7FF) ||
(c >= 0xE000 && c <= 0xFFFD) ||
(c >= 0x10000 && c <= 0x10FFFF) ||
(c == 0x09 || c == 0x0A || c == 0x0D);
/* The XML spec also discouraged use of the following, though they were not forbidden,
because they were either control characters or permanently undefined Unicode characters:
[#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDEF],
[#x1FFFE-#x1FFFF], [#x2FFFE-#x2FFFF], [#x3FFFE-#x3FFFF],
[#x4FFFE-#x4FFFF], [#x5FFFE-#x5FFFF], [#x6FFFE-#x6FFFF],
[#x7FFFE-#x7FFFF], [#x8FFFE-#x8FFFF], [#x9FFFE-#x9FFFF],
[#xAFFFE-#xAFFFF], [#xBFFFE-#xBFFFF], [#xCFFFE-#xCFFFF],
[#xDFFFE-#xDFFFF], [#xEFFFE-#xEFFFF], [#xFFFFE-#xFFFFF],
[#x10FFFE-#x10FFFF].
Since they are not forbidden, however, this reader will not make the decision to get rid of them.
*/
}
}