// License: GPL. For details, see LICENSE file.
package org.openstreetmap.josm.io;
import java.io.IOException;
import java.io.Reader;
import org.openstreetmap.josm.Main;
/**
* FilterInputStream that gets rid of characters that are invalid in an XML 1.0
* document.
*
* Although these characters are forbidden, in the real wold they still appear
* in XML files. Java's SAX parser throws an exception, so we have to filter
* at a lower level.
*
* Only handles control characters (<0x20). Invalid characters are replaced
* by space (0x20).
*/
public class InvalidXmlCharacterFilter extends Reader {
private final Reader reader;
private static boolean firstWarning = true;
private static final boolean[] INVALID_CHARS;
static {
INVALID_CHARS = new boolean[0x20];
for (int i = 0; i < INVALID_CHARS.length; ++i) {
INVALID_CHARS[i] = true;
}
INVALID_CHARS[0x9] = false; // tab
INVALID_CHARS[0xA] = false; // LF
INVALID_CHARS[0xD] = false; // CR
}
/**
* Constructs a new {@code InvalidXmlCharacterFilter} for the given Reader.
* @param reader The reader to filter
*/
public InvalidXmlCharacterFilter(Reader reader) {
this.reader = reader;
}
@Override
public int read(char[] b, int off, int len) throws IOException {
int n = reader.read(b, off, len);
if (n == -1) {
return -1;
}
for (int i = off; i < off + n; ++i) {
b[i] = filter(b[i]);
}
return n;
}
@Override
public void close() throws IOException {
reader.close();
}
private static char filter(char in) {
if (in < 0x20 && INVALID_CHARS[in]) {
if (firstWarning) {
Main.warn("Invalid xml character encountered: '"+in+"'.");
firstWarning = false;
}
return 0x20;
}
return in;
}
}