package net.varkhan.base.conversion.character;
import net.varkhan.base.conversion.AbstractDecoder;
import net.varkhan.base.conversion.Decoder;
import net.varkhan.base.conversion.serializer.DecodingException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.ReadOnlyBufferException;
import java.nio.charset.Charset;
/**
* <b>An UTF-8 CharSequence decoder</b>.
* <p/>
* Decodes a series of UTF-8 variable-width code points (on 1 to 4 bytes,
* depending on the code point) as a Java String (UTF-16).
* <p/>
*
* @author varkhan
* @date 1/30/11
* @time 5:42 AM
*/
public class UTF8Decoder<C> extends AbstractDecoder<String,C> implements Decoder<String,C> {
public static final Charset UTF_8 = Charset.forName("UTF-8");
public String decode(InputStream stm, C ctx) {
try {
return _decode(new StringBuilder(), stm).toString();
}
catch(IOException e) {
/* Never happens -- return null to make compiler happy*/
return null;
}
}
public String decode(ByteBuffer buf, C ctx) {
try {
return _decode(new StringBuilder(), buf).toString();
}
catch(IOException e) {
/* Never happens -- return null to make compiler happy*/
return null;
}
}
public String decode(byte[] dat, long pos, long len, C ctx) {
try {
return _decode(new StringBuilder(), dat, pos, len).toString();
}
catch(IOException e) {
/* Never happens -- return null to make compiler happy*/
return null;
}
}
public static <A extends Appendable> A _decode(A out, InputStream stm) throws IOException {
int r=stm.read();
while(r>=0) {
try {
int b=0xFF&r;
if(b<0x80) {
out.append((char) (b&0x7F));
}
else if(b<0xC0) { throw new DecodingException("Incomplete Unicode sequence"); }
else if(b<0xE0) {
r=stm.read();
if(r<0) throw new DecodingException("Incomplete Unicode sequence");
int b2=0xFF&r;
out.append((char) (((b&0x1F)<<6)|(b2&0x3F)));
}
else if(b<0xF0) {
r=stm.read();
if(r<0) throw new DecodingException("Incomplete Unicode sequence");
int b2=0xFF&r;
r=stm.read();
if(r<0) throw new DecodingException("Incomplete Unicode sequence");
int b3=0xFF&r;
out.append((char) (((b&0x0F)<<12)|((b2&0x3F)<<6)|(b3&0x3F)));
}
else if(b<0xF8) {
r=stm.read();
if(r<0) throw new DecodingException("Incomplete Unicode sequence");
int b2=0xFF&r;
r=stm.read();
if(r<0) throw new DecodingException("Incomplete Unicode sequence");
int b3=0xFF&r;
r=stm.read();
if(r<0) throw new DecodingException("Incomplete Unicode sequence");
int b4=0xFF&r;
out.append((char) (((b&0x07)<<18)|((b2&0x3F)<<12)|((b3&0x3F)<<6)|(b4&0x3F)));
}
else { throw new DecodingException("Incomplete Unicode sequence"); }
r=stm.read();
}
catch(IOException e) {
throw new DecodingException(e);
}
}
return out;
}
public static <A extends Appendable> A _decode(A out, ByteBuffer buf) throws IOException {
while(buf.position()<buf.limit()) {
try {
int b=0xFF&buf.get();
if(b<0x80) {
out.append((char) (b&0x7F));
}
else if(b<0xC0) { throw new DecodingException("Incomplete Unicode sequence"); }
else if(b<0xE0) {
out.append((char) (((b&0x1F)<<6)|(buf.get()&0x3F)));
}
else if(b<0xF0) {
out.append((char) (((b&0x0F)<<12)|((buf.get()&0x3F)<<6)|(buf.get()&0x3F)));
}
else if(b<0xF8) {
out.append((char) (((b&0x07)<<18)|((buf.get()&0x3F)<<12)|((buf.get()&0x3F)<<6)|(buf.get()&0x3F)));
}
else { throw new DecodingException("Incomplete Unicode sequence"); }
}
catch(BufferOverflowException e) {
throw new DecodingException(e);
}
catch(ReadOnlyBufferException e) {
throw new DecodingException(e);
}
}
return out;
}
public static <A extends Appendable> A _decode(A out, byte[] dat, long pos, long len) throws IOException {
int p = (int)pos;
while(p<len) {
try {
int b=0xFF&dat[p++];
if(b<0x80) {
out.append((char) (b&0x7F));
}
else if(b<0xC0) { throw new DecodingException("Incomplete Unicode sequence"); }
else if(b<0xE0) {
out.append((char) (((b&0x1F)<<6)|(dat[p++]&0x3F)));
}
else if(b<0xF0) {
out.append((char) (((b&0x0F)<<12)|((dat[p++]&0x3F)<<6)|(dat[p++]&0x3F)));
}
else if(b<0xF8) {
out.append((char) (((b&0x07)<<18)|((dat[p++]&0x3F)<<12)|((dat[p++]&0x3F)<<6)|(dat[p++]&0x3F)));
}
else { throw new DecodingException("Incomplete Unicode sequence"); }
}
catch(ArrayIndexOutOfBoundsException e) {
throw new DecodingException(e);
}
}
return out;
}
}