/* * This program is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software * Foundation. * * You should have received a copy of the GNU Lesser General Public License along with this * program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html * or from the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * Copyright (c) 2006 - 2013 Pentaho Corporation and Contributors. All rights reserved. */ package org.pentaho.reporting.libraries.fonts.encoding.manual; import org.pentaho.reporting.libraries.fonts.encoding.ByteBuffer; import org.pentaho.reporting.libraries.fonts.encoding.ByteStream; import org.pentaho.reporting.libraries.fonts.encoding.CodePointBuffer; import org.pentaho.reporting.libraries.fonts.encoding.CodePointStream; import org.pentaho.reporting.libraries.fonts.encoding.ComplexEncoding; import org.pentaho.reporting.libraries.fonts.encoding.EncodingErrorType; import org.pentaho.reporting.libraries.fonts.encoding.EncodingException; import java.util.Locale; public class Utf16LE implements ComplexEncoding { public static final int MAX_CHAR = 0x10FFFD; private static Utf16LE instance; public static synchronized Utf16LE getInstance() { if ( instance == null ) { instance = new Utf16LE(); } return instance; } public Utf16LE() { } public String getName() { return "UTF-16LE"; } public String getName( final Locale locale ) { return "UTF-16LE"; } public boolean isUnicodeCharacterSupported( final int c ) { return ( c > 0 ) && ( c < MAX_CHAR ) && // this is the maximum number of characters defined. ( c & 0xFFFFF800 ) == 0xD800; // this is the replacement zone. } /** * Encode, but ignore errors. * * @param text * @param buffer * @return */ public ByteBuffer encode( final CodePointBuffer text, ByteBuffer buffer ) { final int textLength = text.getLength(); if ( buffer == null ) { buffer = new ByteBuffer( textLength * 2 ); } else if ( ( buffer.getLength() * 2 ) < textLength ) { buffer.ensureSize( textLength * 2 ); } final ByteStream target = new ByteStream( buffer, textLength ); final int[] sourceArray = text.getData(); final int endPos = text.getCursor(); for ( int i = text.getOffset(); i < endPos; i++ ) { final int sourceItem = sourceArray[ i ]; if ( sourceItem < 0 || sourceItem > MAX_CHAR ) { continue; } if ( sourceItem <= 0xFFFF ) { if ( sourceItem >= 0xD800 && sourceItem <= 0xDFFF ) { // this is an error condition. We ignore it for now .. continue; } target.put( (byte) ( ( sourceItem & 0xff00 ) >> 8 ) ); target.put( (byte) ( sourceItem & 0xff ) ); } else { // compute the weird replacement mode chars .. final int derivedSourceItem = sourceItem - 0x10000; final int highWord = 0xD800 | ( ( derivedSourceItem & 0xFFC00 ) >> 10 ); target.put( (byte) ( ( highWord & 0xff00 ) >> 8 ) ); target.put( (byte) ( highWord & 0xff ) ); final int lowWord = 0xDC00 | ( derivedSourceItem & 0x3FF ); target.put( (byte) ( ( lowWord & 0xff00 ) >> 8 ) ); target.put( (byte) ( lowWord & 0xff ) ); } } target.close(); return buffer; } public CodePointBuffer decode( final ByteBuffer text, CodePointBuffer buffer ) { final int textLength = text.getLength(); if ( buffer == null ) { buffer = new CodePointBuffer( textLength / 2 ); } else if ( ( buffer.getLength() / 2 ) < textLength ) { buffer.ensureSize( textLength / 2 ); } final int[] targetData = buffer.getData(); final ByteStream sourceBuffer = new ByteStream( text, 10 ); // this construct gives us an even number ... int position = buffer.getOffset(); while ( sourceBuffer.getReadSize() >= 2 ) { final int highByte = ( sourceBuffer.get() & 0xff ); final int lowByte = ( sourceBuffer.get() & 0xff ); if ( ( highByte & 0xFC ) == 0xD8 ) { if ( sourceBuffer.getReadSize() < 2 ) { // we reached the end of the parsable stream ... // this is an error condition // Log.debug("Reached the end .."); break; } final int highByteL = ( sourceBuffer.get() & 0xff ); final int lowByteL = ( sourceBuffer.get() & 0xff ); if ( ( highByteL & 0xFC ) == 0xDC ) { // decode the extended CodePoint ... int result = lowByteL; result |= ( highByteL & 0x03 ) << 8; result |= lowByte << 10; result |= ( highByte & 0x03 ) << 18; targetData[ position ] = result + 0x10000; position += 1; } else { // this is an error condition. // Log.debug("error 1.."); } } else if ( ( highByte & 0xFC ) == 0xDC ) { // this is an error condition .. // skip this word .. // Log.debug("error 2.."); } else { // decode the simple mode ... targetData[ position ] = ( highByte << 8 ) | lowByte; position += 1; } } buffer.setCursor( position ); return buffer; } public ByteBuffer encode( final CodePointBuffer text, final ByteBuffer buffer, final EncodingErrorType errorHandling ) throws EncodingException { return encode( text, buffer ); } public CodePointBuffer decode( final ByteBuffer text, final CodePointBuffer buffer, final EncodingErrorType errorHandling ) throws EncodingException { return decode( text, buffer ); } /** * Checks, whether this implementation supports encoding of character data. * * @return */ public boolean isEncodingSupported() { return true; } public CodePointBuffer decodeString( final String text, final CodePointBuffer buffer ) { final char[] chars = text.toCharArray(); final int textLength = chars.length; return decode( chars, 0, textLength, buffer ); } public CodePointBuffer decode( final char[] chars, final int offset, final int length, CodePointBuffer buffer ) { if ( buffer == null ) { buffer = new CodePointBuffer( length ); } else if ( ( buffer.getLength() ) < length ) { buffer.ensureSize( length ); } final CodePointStream cps = new CodePointStream( buffer, 10 ); final int maxPos = offset + length; for ( int i = offset; i < maxPos; i++ ) { final char c = chars[ i ]; if ( ( c & 0xFC00 ) == 0xD800 ) { i += 1; if ( i < maxPos ) { final char c2 = chars[ i ]; if ( ( c2 & 0xFC00 ) == 0xDC00 ) { final int codePoint = 0x10000 + ( ( c2 & 0x3FF ) | ( ( c & 0x3FF ) << 10 ) ); cps.put( codePoint ); } else { // Should not happen .. } } else { // illegal char .. ignore it .. // of course: This should not happen, as this produced by JDK code break; } } else { cps.put( c ); } } cps.close(); return buffer; } public String encodeString( final CodePointBuffer buffer ) { final StringBuffer stringBuffer = new StringBuffer( buffer.getLength() * 3 / 2 ); final int[] data = buffer.getData(); final int endPos = buffer.getCursor(); for ( int i = buffer.getOffset(); i < endPos; i++ ) { final int codePoint = data[ i ]; if ( codePoint < 0x10000 ) { stringBuffer.append( (char) codePoint ); } else { // oh, no, we have to decode ... // compute the weird replacement mode chars .. final int derivedSourceItem = codePoint - 0x10000; final int highWord = 0xD800 | ( ( derivedSourceItem & 0xFFC00 ) >> 10 ); final int lowWord = 0xDC00 | ( derivedSourceItem & 0x3FF ); stringBuffer.append( (char) highWord ); stringBuffer.append( (char) lowWord ); } } // Log.debug ("Encoded:" + stringBuffer + " (" + buffer.getOffset() + ", " + endPos + ")"); return stringBuffer.toString(); } // public static void main (String[] args) // throws UnsupportedEncodingException // { // Utf16LE utf = new Utf16LE(); // final String text = "The lazy fox jumps over the lemon tree"; // byte[] bytes = text.getBytes("UTF16"); // CodePointBuffer cp = utf.decode // (new ByteBuffer(bytes), new CodePointBuffer(text.length())); // int[] cps = cp.getData(); // // final int length = cp.getLength(); // for (int i = 1; i < length; i++) // { // int cp1 = cps[i]; // if (cp1 != text.charAt(i - 1)) // { // throw new IllegalStateException("Error at " + i + ": " + // Integer.toHexString(cp1) + " vs " + // Integer.toHexString(text.charAt(i - 1))); // } // } // } }