UTF8CharacterConverter.java example

Explorer
TotalCrossSDK-master
- TotalCrossSDK
  - docs
    - companion_resources
      - listings
        HelloWorld.java
        TestConcurrent.java
  - src
    - tc
    - totalcross
/*********************************************************************************
 *  TotalCross Software Development Kit                                          *
 *  Copyright (C) 2003-2004 Jaxo Systems - Pierre G. Richard                     *
 *  Copyright (C) 2003-2012 SuperWaba Ltda.                                      *
 *  All Rights Reserved                                                          *
 *                                                                               *
 *  This library and virtual machine is distributed in the hope that it will     *
 *  be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of    *
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                         *
 *                                                                               *
 *  This file is covered by the GNU LESSER GENERAL PUBLIC LICENSE VERSION 3.0    *
 *  A copy of this license is located in file license.txt at the root of this    *
 *  SDK or can be downloaded here:                                               *
 *  http://www.gnu.org/licenses/lgpl-3.0.txt                                     *
 *                                                                               *
 *********************************************************************************/



package totalcross.sys;

/**
* This class is used to correctly handle UTF8 byte to UCS-2 chracter conversions.
* <P>
* To use this class, you can call
* <pre>totalcross.sys.Convert.setDefaultConverter("UTF8");</pre>
*
* @see totalcross.sys.Convert#charConverter
* @see totalcross.sys.Convert#setDefaultConverter(String) 
* @see totalcross.sys.CharacterConverter
* @see     CharacterConverter
* @author  Pierre G. Richard
*/
public class UTF8CharacterConverter extends CharacterConverter
{
   /**
   * Convert UTF-8 bytes to UCS-2 characters
   *
   * @param bytes byte array to convert
   * @param start first byte to convert in the byte array
   * @param length number of bytes to convert
   * @return UCS-2 character array resulting from the conversion
   */
   public char[] bytes2chars(byte bytes[], int start, int length)
   {
      int end = start + length;
      int tgtOfs = 0;
      char []chars = new char[length];        // upper bound

      while (start < end)
      {
         int c0 = bytes[start++] & 0xFF;
         if (c0 < 0x80)                          // if a 1 byte sequence,
         {
            chars[tgtOfs++] = (char)c0;          // set the value
            continue;                            // success.
         }
         if (start >= end)                       // If no byte follows,
         {
            chars[tgtOfs++] = '?';               // set MCS
            break;                               // done
         }
         int c = (bytes[start++] & 0xFF) ^ 0x80; // 2nd byte
         if ((c & 0xC0) != 0)                    // starts new sequence?
         {
            --start;                             // Yes, backup
            chars[tgtOfs++] = '?';               // set MCS
            continue;                            // pursue
         }
         int r = (c0 << 6) | c;                  // Get encoded value
         if ((c0 & 0xE0) == 0xC0)                // 2 bytes sequence?
         {
            chars[tgtOfs++] = (char)(r & 0x7FF); // Yes.  Cut noise
            continue;                            // pursue
         }
         if (start >= end)                       // If no byte follows,
         {
            chars[tgtOfs++] = '?';               // set MCS
            break;                               // done
         }
         c = (bytes[start++] & 0xFF) ^ 0x80;     // 3rd byte
         if ((c & 0xC0) != 0)                    // starts new sequence?
         {
            --start;                             // Yes, backup
            chars[tgtOfs++] = '?';               // set MCS
            continue;                            // pursue
         }
         chars[tgtOfs++] = (char)((r << 6) | c); // Get encoded value
      }
      if (chars.length > tgtOfs)                 // too much room left
      {
         char[] temp = new char[tgtOfs];         // shrink to exact size
         Vm.arrayCopy(chars, 0, temp, 0, tgtOfs);
         chars = temp;
      }
      return chars;
   }

   /**
   * Convert UCS-2 characters to UTF-8 bytes
   *
   * @param chars character array to convert
   * @param start first character to convert in the character array
   * @param length number of characters to convert
   * @return UTF-8 byte array resulting from the conversion
   */
   public byte[] chars2bytes(char chars[], int start, int length)
   {
      int tgtOfs = 0;
      int end = start + length;
      byte[] bytes = new byte[length+length+length]; // guich@566_5: worst case is all chars > 0x800, which leads to 3 x length

      while (start < end)
      {
         int r = chars[start++];
         if (r < 0x80)                             // 1 byte sequence
            bytes[tgtOfs++] = (byte)r;              // Yes: set the value
         else
         if (r < 0x800)                      // 2 bytes sequence?
         {
            bytes[tgtOfs++] = (byte)(0xC0 | (r >> 6));
            bytes[tgtOfs++] = (byte)(0x80 | (r & 0x3F));
         }
         else                                     // 3 bytes sequence.
         {
            bytes[tgtOfs++] = (byte)(0xE0 | (r >> 12));
            bytes[tgtOfs++] = (byte)(0x80 | ((r >> 6) & 0x3F));
            bytes[tgtOfs++] = (byte)(0x80 | (r & 0x3F));
         }
      }
      if (bytes.length > tgtOfs)                   // too much room left
      {
         byte[] temp = new byte[tgtOfs];            // shrink to exact size
         Vm.arrayCopy(bytes, 0, temp, 0, tgtOfs);
         bytes = temp;
      }
      return bytes;
   }

   native public char[]bytes2chars4D(byte bytes[], int offset, int length);
   native public byte[] chars2bytes4D(char chars[], int offset, int length);
}