package com.cloudhopper.commons.charset; /* * #%L * ch-commons-charset * %% * Copyright (C) 2012 Cloudhopper by Twitter * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ /** * Utility for packing and unpacking 8-bit to/from 7-bit byte arrays. * * @author joelauer (twitter: @jjlauer or <a href="http://twitter.com/jjlauer" target=window>http://twitter.com/jjlauer</a>) */ public class GSMBitPacker { /** * Pack a byte array according to the GSM bit-packing algorithm. * The GSM specification defines a simple compression mechanism for its * default alphabet to pack more message characters into a smaller space. * Since the alphabet only contains 128 symbols, each one can be represented * in 7 bits. The packing algorithm squeezes the bits for each symbol * "down" into the preceeding byte (so bit 7 of the first byte actually * contains bit 0 of the second symbol in a default alphabet string, bits * 6 and 7 in the second byte contain bits 0 and 1 of the third symbol etc.) * Since the maximum short message length is 140 <b>bytes</b>, you save * one bit per byte using the default alphabet giving you a total of * 140 + (140 / 8) = 160 characters to use. This is where the 160 character * limit comes from in SMPP packets. * <p> * Having said all that, most SMSCs do <b>NOT</b> use the packing * algorithm when communicating over TCP/IP. They either use a full * 8-bit alphabet such as ASCII or Latin-1, or they accept the default * alphabet in its unpacked form. As such, you will be unlikely to * need this method. * </o> * @param unpacked The unpacked byte array. * @return A new byte array containing the bytes in their packed form. */ static public byte[] pack(byte[] unpacked) { if (unpacked == null) { return null; } int packedLen = unpacked.length - (unpacked.length / 8); //byte[] out = new byte[(int)Math.ceil((unpacked.length * 7) / 8f)]; byte[] packed = new byte[packedLen]; int len = unpacked.length; int current = 0; int bitpos = 0; for (int i = 0; i < len; i++) { byte b = (byte)(unpacked[i] & 0x7F); // remove top bit // assign first half of partial bits packed[current] |= (byte) ((b & 0xFF) << bitpos); // assign second half of partial bits (if exist) if (bitpos >= 2) packed[++current] |= (b >> (8 - bitpos)); bitpos = (bitpos + 7) % 8; if (bitpos == 0) current++; } return packed; } /** static public byte[] pack(byte[] unpacked) { if (unpacked == null) { return null; } int packedLen = unpacked.length - (unpacked.length / 8); byte[] packed = new byte[packedLen]; int pos = 0; int i = 0; while (i < unpacked.length) { int jmax = (i + 7) > unpacked.length ? unpacked.length - i : 7; int mask = 0x1; for (int j = 0; j < jmax; j++) { int b1 = (int) unpacked[i + j] & 0xff; int b2 = 0x0; try { b2 = (int) unpacked[i + j + 1] & mask; } catch (ArrayIndexOutOfBoundsException x) { } packed[pos++] = (byte) ((b1 >>> j) | (b2 << (8 - (j + 1)))); mask = (mask << 1) | 1; } i += 8; } return packed; } */ /** * Unpack a byte array according to the GSM bit-packing algorithm. * Read the full description in the documentation of the * <code>pack</code> method. * @see #pack(byte[]) * @param packed The packed byte array. * @return A new byte array containing the unpacked bytes. */ /** static public byte[] unpack2(byte[] packed) { if (packed == null) { return null; } int unpackedLen = (packed.length * 8) / 7; byte[] unpacked = new byte[unpackedLen]; int pos = 0; int i = 0; while (i < packed.length) { int mask = 0x7f; int jmax = (i + 8) > packed.length ? (packed.length - i) : 8; for (int j = 0; j < jmax; j++) { int b1 = (int) packed[i + j] & mask; int b2 = 0x0; try { b2 = (int) packed[(i + j) - 1] & 0x00ff; } catch (ArrayIndexOutOfBoundsException x) { } unpacked[pos++] = (byte) ((b1 << j) | (b2 >>> (8 - j))); mask >>= 1; } i += 7; } return unpacked; } */ /** * Unpack a byte array according to the GSM bit-packing algorithm. * Read the full description in the documentation of the * <code>pack</code> method. * @see #pack(byte[]) * @param packed The packed byte array. * @return A new byte array containing the unpacked bytes. */ static public byte[] unpack(byte[] packed) { if (packed == null) { return null; } int unpackedLen = (packed.length * 8) / 7; byte[] unpacked = new byte[unpackedLen]; int len = unpacked.length; int current = 0; int bitpos = 0; for (int i = 0; i < len; i++) { // remove top bit and assign first half of partial bits unpacked[i] = (byte)(((packed[current] & 0xFF) >> bitpos) & 0x7F); // remove top bit and assign second half of partial bits (if exist) if (bitpos >= 2) unpacked[i] |= (byte)((packed[++current] << (8 - bitpos)) & 0x7F); bitpos = (bitpos + 7) % 8; if (bitpos == 0) current++; } // this fixes an ambiguity bug in the specs // where the last of 8 packed bytes is 0 // and it's impossible to distinguish whether it is a // trailing '@' character (which is mapped to 0) // or extra zero-bit padding for 7 actual data bytes. // // we opt for the latter, since it's far more likely, // at the cost of losing a trailing '@' character // in strings whose unpacked size modulo 8 is 0, // and whose last character is '@'. // // an application that wishes to handle this rare case // properly must disambiguate this case externally, such // as by obtaining the original string length, and // appending the trailing '@' if the length // shows that there is one character missing. if (len % 8 == 0 && len > 0 && unpacked[len-1] == 0) { //System.err.println("Hit special case..."); byte[] fixed = new byte[len-1]; System.arraycopy(unpacked, 0, fixed, 0, len-1); unpacked = fixed; } return unpacked; } }