/**************************************************************************
* Copyright (c) 2001 by Punch Telematix. All rights reserved. *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions *
* are met: *
* 1. Redistributions of source code must retain the above copyright *
* notice, this list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright *
* notice, this list of conditions and the following disclaimer in the *
* documentation and/or other materials provided with the distribution. *
* 3. Neither the name of Punch Telematix nor the names of *
* other contributors may be used to endorse or promote products *
* derived from this software without specific prior written permission.*
* *
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED *
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF *
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. *
* IN NO EVENT SHALL PUNCH TELEMATIX OR OTHER CONTRIBUTORS BE LIABLE *
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR *
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF *
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR *
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, *
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE *
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
**************************************************************************/
/*
** $Id: UTF8Decoder.java,v 1.1.1.1 2004/07/12 14:07:47 cvs Exp $
*/
package wonka.decoders;
import java.io.*;
public class UTF8Decoder extends Decoder {
/** extra methods for ObjectIn/OutputStream (Thread safe methods) */
public static native byte[] stringToB(String string);
public static native String bToString(byte[] bytes, int off, int len);
/** END */
private int available;
private byte[] buf;
private int pos;
public UTF8Decoder(){}
public int getChar(InputStream in) throws IOException {
int c = readIn(in);
if(c < 0x80){
return c;
}
else if (c >= 0xe0) {
int utf8_top_4 = c & 0x0f;
c = readIn(in);
if (c < 0) {
return -1;
}
if (c >=0xc0 || c < 0x80) {
throw new UTFDataFormatException("bad second of triple: "+c);
}
int utf8_middle_6 = c & 0x3f;
c = readIn(in);
if (c < 0) {
return -1;
}
if (c >=0xc0 || c < 0x80) {
throw new UTFDataFormatException("bad third of triple: "+c);
}
int utf8_bottom_6 = c & 0x3f;
return (((utf8_top_4 << 6) | utf8_middle_6) << 6) | utf8_bottom_6;
}
else if (c >= 0xc0) {
int utf8_middle_6 = c & 0x3f;
c = readIn(in);
if (c < 0) {
return -1;
}
if (c >=0xc0 || c < 0x80) {
throw new UTFDataFormatException("bad second of duple: "+c);
}
int utf8_bottom_6 = c & 0x3f;
c = (utf8_middle_6 << 6) | utf8_bottom_6;
}
throw new UTFDataFormatException("bad start of sequence: "+c);
}
public native byte[] cToB(char[] chars, int off, int len);
public native char[] bToC(byte[] bytes, int off, int len);
public int cFromStream(InputStream in, char[] chars, int off, int len) throws IOException {
byte[] oldbuf = buf;
int avail = available;
byte[]buffer = oldbuf;
if(len < 1){
if(len == 0) return 0;
throw new ArrayIndexOutOfBoundsException();
}
if (len * 3 > avail) {
buffer = new byte[len * 3];
if (avail > 0) {
System.arraycopy(oldbuf, pos, buffer, 0, avail);
}
int readlen = in.read(buffer, avail, len * 3 - avail);
if (readlen > 0) {
avail += readlen;
}
}
int nread = 0;
int used = 0;
while (nread < len){
if (used >= avail) {
break;
}
int c = buffer[used++] & 0xff;
if(c >= 0x80){
if (c >= 0xe0) {
int utf8_top_4 = c & 0x0f;
if (used >= avail) {
used--;
break;
}
c = buffer[used++] & 0xff;
if (c >=0xc0 || c < 0x80) {
throw new UTFDataFormatException("bad second of triple: "+c);
}
int utf8_middle_6 = c & 0x3f;
if (used >= avail) {
used -= 2;
break;
}
c = buffer[used++] & 0xff;
if (c >=0xc0 || c < 0x80) {
throw new UTFDataFormatException("bad third of triple: "+c);
}
int utf8_bottom_6 = c & 0x3f;
c = (((utf8_top_4 << 6) | utf8_middle_6) << 6) | utf8_bottom_6;
}
else if (c >= 0xc0) {
int utf8_middle_6 = c & 0x3f;
if (used >= avail) {
used--;
break;
}
c = buffer[used++] & 0xff;
if (c >=0xc0 || c < 0x80) {
throw new UTFDataFormatException("bad second of duple: "+c);
}
int utf8_bottom_6 = c & 0x3f;
c = (utf8_middle_6 << 6) | utf8_bottom_6;
}
else {
throw new UTFDataFormatException("bad start of sequence: "+c);
}
}
// else c is ASCII, pass straight through
chars[off++] = (char)c;
++nread;
}
buf = buffer;
pos = used;
available = avail - used;
if (nread == 0) {
return -1;
}
return nread;
}
public String getEncoding(){
return "UTF8";
}
protected Decoder getInstance(){
return new UTF8Decoder();
}
private int readIn(InputStream in) throws IOException {
if(available > 0){
available--;
return buf[pos++] & 0xff;
}
else {
return in.read();
}
}
}