package org.orbeon.saxon.event;
import org.orbeon.saxon.charcode.UTF16;
import org.orbeon.saxon.tinytree.CompressedWhitespace;
import org.orbeon.saxon.trans.XPathException;
public class XML1252Emitter extends XMLEmitter {
// This method overrides the Saxon method so we can do a custom fix-up of invalid CP-1252 characters.
@Override protected void writeEscape(final CharSequence chars, final boolean inAttribute)
throws java.io.IOException, XPathException {
int segstart = 0;
boolean disabled = false;
final boolean[] specialChars = (inAttribute ? specialInAtt : specialInText);
if (chars instanceof CompressedWhitespace) {
((CompressedWhitespace)chars).writeEscape(specialChars, writer);
return;
}
final int clength = chars.length();
while (segstart < clength) {
int i = segstart;
// find a maximal sequence of "ordinary" characters
while (i < clength) {
final char c = chars.charAt(i);
if (c < 127) {
if (specialChars[c]) {
break;
} else {
i++;
}
} else if (c < 160) {
break;
} else if (c == 0x2028) {
break;
} else if (UTF16.isHighSurrogate(c)) {
break;
} else if (!characterSet.inCharset(c)) {
break;
} else {
i++;
}
}
// if this was the whole string write it out and exit
if (i >= clength) {
if (segstart == 0) {
writeCharSequence(chars);
} else {
writeCharSequence(chars.subSequence(segstart, i));
}
return;
}
// otherwise write out this sequence
if (i > segstart) {
writeCharSequence(chars.subSequence(segstart, i));
}
// examine the special character that interrupted the scan
final char c = chars.charAt(i);
if (c==0) {
// used to switch escaping on and off
// See https://github.com/orbeon/orbeon-forms/issues/3115
// disabled = !disabled;
} else if (disabled) {
if (c > 127) {
if (UTF16.isHighSurrogate(c)) {
int cc = UTF16.combinePair(c, chars.charAt(i+1));
if (!characterSet.inCharset(cc)) {
XPathException de = new XPathException("Character x" + Integer.toHexString(cc) +
" is not available in the chosen encoding");
de.setErrorCode("SERE0008");
throw de;
}
} else if (!characterSet.inCharset(c)) {
XPathException de = new XPathException("Character " + c + " (x" + Integer.toHexString((int)c) +
") is not available in the chosen encoding");
de.setErrorCode("SERE0008");
throw de;
}
}
writer.write(c);
} else if (c>=127 && c<160) {
// XML 1.1 requires these characters to be written as character references
// ORBEON: Handle faulty CP-1252 characters ending up as unicode code points.
outputCharacterReference(HTML1252Emitter.CP1252_BEST_FIT[c - 127]);
} else if (c>=160) {
if (c==0x2028) {
outputCharacterReference(c);
} else if (UTF16.isHighSurrogate(c)) {
char d = chars.charAt(++i);
int charval = UTF16.combinePair(c, d);
if (characterSet.inCharset(charval)) {
writer.write(c);
writer.write(d);
} else {
outputCharacterReference(charval);
}
} else {
// process characters not available in the current encoding
outputCharacterReference(c);
}
} else {
// process special ASCII characters
if (c=='<') {
writer.write("<");
} else if (c=='>') {
writer.write(">");
} else if (c=='&') {
writer.write("&");
} else if (c=='\"') {
writer.write(""");
} else if (c=='\n') {
writer.write("
");
} else if (c=='\r') {
writer.write("
");
} else if (c=='\t') {
writer.write(" ");
} else {
// C0 control characters
outputCharacterReference(c);
}
}
segstart = ++i;
}
}
}
//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file.
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
//
// Contributor(s): none.
//