/* * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package com.sun.xml.internal.bind.v2.runtime.output; import java.io.IOException; /** * Buffer for UTF-8 encoded string. * * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding. * * @author Kohsuke Kawaguchi */ public final class Encoded { public byte[] buf; public int len; public Encoded() {} public Encoded(String text) { set(text); } public void ensureSize(int size) { if(buf==null || buf.length<size) buf = new byte[size]; } public final void set( String text ) { int length = text.length(); ensureSize(length*3+1); // +1 for append int ptr = 0; for (int i = 0; i < length; i++) { final char chr = text.charAt(i); if (chr > 0x7F) { if (chr > 0x7FF) { if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) { // surrogate int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000; buf[ptr++] = (byte)(0xF0 | ((uc >> 18))); buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F)); buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F)); buf[ptr++] = (byte)(0x80 + (uc & 0x3F)); continue; } buf[ptr++] = (byte)(0xE0 + (chr >> 12)); buf[ptr++] = (byte)(0x80 + ((chr >> 6) & 0x3F)); } else { buf[ptr++] = (byte)(0xC0 + (chr >> 6)); } buf[ptr++] = (byte)(0x80 + (chr & 0x3F)); } else { buf[ptr++] = (byte)chr; } } len = ptr; } /** * Fill in the buffer by encoding the specified characters * while escaping characters like < * * @param isAttribute * if true, characters like \t, \r, and \n are also escaped. */ public final void setEscape(String text, boolean isAttribute) { int length = text.length(); ensureSize(length*6+1); // in the worst case the text is like """""", so we need 6 bytes per char int ptr = 0; for (int i = 0; i < length; i++) { final char chr = text.charAt(i); int ptr1 = ptr; if (chr > 0x7F) { if (chr > 0x7FF) { if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) { // surrogate int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000; buf[ptr++] = (byte)(0xF0 | ((uc >> 18))); buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F)); buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F)); buf[ptr++] = (byte)(0x80 + (uc & 0x3F)); continue; } buf[ptr1++] = (byte)(0xE0 + (chr >> 12)); buf[ptr1++] = (byte)(0x80 + ((chr >> 6) & 0x3F)); } else { buf[ptr1++] = (byte)(0xC0 + (chr >> 6)); } buf[ptr1++] = (byte)(0x80 + (chr & 0x3F)); } else { byte[] ent; if((ent=attributeEntities[chr])!=null) { // the majority of the case is just printed as a char, // so it's very important to reject them as quickly as possible // check again to see if this really needs to be escaped if(isAttribute || entities[chr]!=null) ptr1 = writeEntity(ent,ptr1); else buf[ptr1++] = (byte)chr; } else buf[ptr1++] = (byte)chr; } ptr = ptr1; } len = ptr; } private int writeEntity( byte[] entity, int ptr ) { System.arraycopy(entity,0,buf,ptr,entity.length); return ptr+entity.length; } /** * Writes the encoded bytes to the given output stream. */ public final void write(UTF8XmlOutput out) throws IOException { out.write(buf,0,len); } /** * Appends a new character to the end of the buffer. * This assumes that you have enough space in the buffer. */ public void append(char b) { buf[len++] = (byte)b; } /** * Reallocate the buffer to the exact size of the data * to reduce the memory footprint. */ public void compact() { byte[] b = new byte[len]; System.arraycopy(buf,0,b,0,len); buf = b; } /** * UTF-8 encoded entities keyed by their character code. * e.g., entities['&'] == AMP_ENTITY. * * In attributes we need to encode more characters. */ private static final byte[][] entities = new byte[0x80][]; private static final byte[][] attributeEntities = new byte[0x80][]; static { add('&',"&",false); add('<',"<",false); add('>',">",false); add('"',""",true); add('\t'," ",true); add('\r'," ",false); add('\n'," ",true); } private static void add(char c, String s, boolean attOnly) { byte[] image = UTF8XmlOutput.toBytes(s); attributeEntities[c] = image; if(!attOnly) entities[c] = image; } }