/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cocoon.components.serializers.encoding; /** * * * @author <a href="mailto:pier@apache.org">Pier Fumagalli</a>, February 2003 * @version CVS $Id$ */ public class XMLEncoder extends CompiledEncoder { private static final char ENCODE_HEX[] = "0123456789ABCDEF".toCharArray(); private static final char ENCODE_QUOT[] = """.toCharArray(); private static final char ENCODE_AMP[] = "&".toCharArray(); private static final char ENCODE_APOS[] = "'".toCharArray(); private static final char ENCODE_LT[] = "<".toCharArray(); private static final char ENCODE_GT[] = ">".toCharArray(); private Character highSurrogate = null; /** * Create a new instance of this <code>XMLEncoder</code>. */ public XMLEncoder() { super("X-W3C-XML"); } /** * Create a new instance of this <code>XMLEncoder</code>. * * @param name A name for this <code>Encoding</code>. * @throws NullPointerException If one of the arguments is <b>null</b>. */ protected XMLEncoder(String name) { super(name); } public void reset() { this.highSurrogate = null; } /** * Return true or false wether this encoding can encode the specified * character or not. * <p> * This method will return true for the following character range: * <br /> * <code> * <nobr>#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]</nobr> * </code> * </p> * * @see <a href="http://www.w3.org/TR/REC-xml#charsets">W3C XML 1.0</a> */ protected boolean compile(char c) { if ((c == 0x09) || // [\t] (c == 0x0a) || // [\n] (c == 0x0d)) { // [\r] return(true); } if ((c == 0x22) || // ["] (c == 0x26) || // [&] (c == 0x27) || // ['] (c == 0x3c) || // [<] (c == 0x3e) || // [>] (c < 0x20) || // See <http://www.w3.org/TR/REC-xml#charsets> ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) { return(false); } return(true); } /** * Return an array of characters representing the encoding for the * specified character. */ public char[] encode(char c) { if (highSurrogate != null) { if (!Character.isLowSurrogate(c)) { throw new IllegalArgumentException("Expected low surrogate char"); } int codePoint = Character.toCodePoint(highSurrogate.charValue(), c); highSurrogate = null; return encode(codePoint); } else if (Character.isHighSurrogate(c)) { highSurrogate = Character.valueOf(c); return new char[0]; } return encode((int) c); } private char[] encode(int c) { switch (c) { case 0x22: return(ENCODE_QUOT); // (") ["] case 0x26: return(ENCODE_AMP); // (&) [&] case 0x27: return(ENCODE_APOS); // (') ['] case 0x3c: return(ENCODE_LT); // (<) [<] case 0x3e: return(ENCODE_GT); // (>) [>] default: { if (c > 0xffff) { char ret[] = { '&', '#', 'x', ENCODE_HEX[c >> 0x10 & 0xf], ENCODE_HEX[c >> 0xc & 0xf], ENCODE_HEX[c >> 0x8 & 0xf], ENCODE_HEX[c >> 0x4 & 0xf], ENCODE_HEX[c & 0xf], ';' }; return(ret); } if (c > 0xfff) { char ret[] = { '&', '#', 'x', ENCODE_HEX[c >> 0xc & 0xf], ENCODE_HEX[c >> 0x8 & 0xf], ENCODE_HEX[c >> 0x4 & 0xf], ENCODE_HEX[c & 0xf], ';' }; return(ret); } if (c > 0xff) { char ret[] = { '&', '#', 'x', ENCODE_HEX[c >> 0x8 & 0xf], ENCODE_HEX[c >> 0x4 & 0xf], ENCODE_HEX[c & 0xf], ';' }; return(ret); } char ret[] = { '&', '#', 'x', ENCODE_HEX[c >> 0x4 & 0xf], ENCODE_HEX[c & 0xf], ';' }; return(ret); } } } }