/**
* Copyright (c) 2007 by Chris Gray, /k/ Embedded Java Solutions.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of /k/ Embedded Java Solutions nor the names of other contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL /K/
* EMBEDDED SOLUTIONS OR OTHER CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id: $
*/
package wonka.decoders;
import java.io.IOException;
import java.io.InputStream;
import java.io.UTFDataFormatException;
/**
* UnicodeDecoder:
*
* @author ruelens
*
* created: Jan 25, 2007
*/
public class UnicodeDecoder extends Decoder {
protected int state;
/**
* @see wonka.decoders.Decoder#bToC(byte[], int, int)
*/
public char[] bToC(byte[] bytes, int off, int len) {
if(len < 2){
return new char[0];
}
int c = bytes[off++];
c = ((c<<8) | (bytes[off++] & 0xff)) & 0xffff;
if(c == 0xfffe){ //LITTLE END
return leBToC(bytes,off,len-2);
}
if(c == 0xfeff){ //BIGEND
return beBToC(bytes,off, len-2);
}
return beBToC(bytes, off-2, len);
}
static char[] beBToC(byte[] bytes, int off, int len) {
int l = (len/2);
char[] chars = new char[l];
for(int i = 0 ; i < l ; i++){
int ch = bytes[off++]<<8;
chars[i] = (char)((ch | (bytes[off++] & 0xff)) & 0xffff);
}
return chars;
}
static char[] leBToC(byte[] bytes, int off, int len) {
int l = (len/2);
char[] chars = new char[l];
for(int i = 0 ; i < l ; i++){
int ch = bytes[off++] & 0xff;
chars[i] = (char)((ch | (bytes[off++]<<8)) & 0xffff);
}
return chars;
}
/**
* @see wonka.decoders.Decoder#cFromStream(java.io.InputStream, char[], int, int)
*/
public int cFromStream(InputStream in, char[] chars, int off, int len)
throws IOException {
if(state == UNDEFINED){
setEndianness(in);
}
int l = 2*len;
byte[] bytes = new byte[l];
int rd = in.read(bytes, 0 , l);
if(rd == -1){
return -1;
}
if(rd % 2 == 1){
int b = in.read();
if(b != -1){
bytes[rd++] = (byte)b;
}
else {
rd--;
}
}
int i = 0;
if(state == BIGEND){
while(i < rd){
int ch = bytes[i++]<<8;
chars[off++] = (char) ((ch | (bytes[i++] & 0xff)) & 0xffff);
}
}
else {
while(i < rd){
int ch = bytes[i++] & 0xff;
chars[off++] = (char) ((ch | (bytes[i++]<<8)) & 0xffff);
}
}
return rd/2;
}
private void setEndianness(InputStream in) throws IOException {
int b1 = in.read();
int b2 = in.read();
if (b1 == 0xfe && b2 == 0xff) {
state = BIGEND;
}
else if (b1 == 0xff && b2 == 0xfe) {
state = LITTLEEND;
}
else {
throw new UTFDataFormatException("missing BOM");
}
}
/**
* @see wonka.decoders.Decoder#cToB(char[], int, int)
*/
public byte[] cToB(char[] chars, int off, int len) {
byte[] bytes = new byte[len*2 + 2];
len += off;
bytes[0] = (byte)0xfe;
bytes[1] = (byte)0xff;
int o = 2;
for(int i = off; i < len ; i++){
int ch = chars[i];
bytes[o++] = (byte)(ch>>8);
bytes[o++] = (byte)ch;
}
return bytes;
}
/**
* @see wonka.decoders.Decoder#getChar(java.io.InputStream)
*/
public int getChar(InputStream in) throws IOException {
if(state == UNDEFINED){
setEndianness(in);
}
int b1 = in.read();
if(b1 == -1){
return -1;
}
int b2 = in.read();
if(b2 == -1){
return -1;
}
if(state == BIGEND){
return (b1<<8) | b2;
}
return (b2<<8) | b1;
}
/**
* @see wonka.decoders.Decoder#getEncoding()
*/
public String getEncoding() {
return "Unicode";
}
protected Decoder getInstance() {
try {
return (Decoder)this.getClass().newInstance();
} catch (Exception e) {
}
return this;
}
}