/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.basis.util;
import java.nio.ByteBuffer;
/**
* This is a utility class for encoding numbers and bytes into the 90 characters that
* are absolutely guaranteed to work in cookies across all broswers for a long time.
*
* There are a few main ways to use this class:
* {@link #encodeBase45(long, boolean, boolean)} and {@link #decodeBase45(CharSequence)}
* provide a way to encode positive numbers only into a cookie string in a way that can be used without delimiters.
*
* {@link #encodeBase90Signed(long)}, {@link #encodeBase90Unsigned(long)}
* and {@link #decodeBase90(CharSequence, boolean)} provide a way to encode numbers into a
* cookie string in the most efficient encoding. The unsigned version returns a variable length
* string, while the signed version will always return a string of length 10.
*
* A better use of <code>encodeBase90Signed</code> is encoding binary data. That's what
* {@link #encodeBytesBase90(byte[])} and {@link #decodeBytesBase90(CharSequence)} are for.
* These methods allow encoding and decoding of arbitrary data. The byte array is converted
* to longs, encoded using <code>encodeBase90Signed</code>, and there is one extra character
* at the end telling the decoder how many bytes the last decoded long represents.
*
* @see <a href="http://stackoverflow.com/a/1969339/1238727">Allowed characters in cookies</a>
*/
public class CookieSafeBase90 {
// these are the 90 chars we can use for the encoding. they are ordered in numerical order.
private static final char[] BASECHARS = ("!#$%&'()*+-./0123456789:<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_" +
"`abcdefghijklmnopqrstuvwxyz{|}~").toCharArray();
private static final int BASE45 = 45;
private static final int BASE90 = 90;
// our char arrays need to be big enough to handle any long in base45.
private static final int MAX_NUMBER_LENGTH = 12;
/**
* Exposed for use by code using {@link #encodeBase45(long, boolean, boolean)}
* Cookie parsers can use this to determine whether a character is in the high or low range of base90 characters.
* For example, if one type of number was encoded with flipLastByte=true, the parser
* would need to check if each character was high or low to find the end of the encoded number.
*/
public static final int MIDDLE_CHAR = BASECHARS[BASE45];
/**
* Encodes a number into a string that can be used without delimiters.
* Only positive numbers can be encoded. This method can be used to make custom cookie formats.
*
* @param value the number to encode
* @param highRange whether to use the bottom 45 or top 45 characters
* @param flipLastByte if true, the last character will be from the other range.
* This also means that all numbers get encoded with at least
* two characters; the first character will be a "zero" character.
* @return The encoded string
*/
public static String encodeBase45(long value, boolean highRange, boolean flipLastByte) {
char[] out = new char[MAX_NUMBER_LENGTH];
for (int i = out.length - 1;; i--) {
boolean isFlip = flipLastByte && (i == (out.length - 1));
int index = (int) Math.abs(value % BASE45);
// NOTE: this is non-obvious how this if statement works!
// if high and no flip, or if low and flip, add base
// if high and flip, or if low and no flip, do nothing
if (isFlip != highRange) {
index += BASE45;
}
value = value / BASE45;
out[i] = BASECHARS[index];
int currentSize = out.length - i;
boolean isLast = (value == 0) || (i == 0);
boolean isDone = isLast && ((currentSize != 1) || !flipLastByte);
// return string, unless size 0 and we need to flip the last byte (needs 0-pad left)
if (isDone) {
return new String(out, i, currentSize);
}
}
}
/**
* Encodes a positive number into base 90, returning a variable string length.
* Negative numbers will be encoded as if they were positive.
*
* @param value the number to encode
* @return the encoded string
*/
public static String encodeBase90Unsigned(long value) {
return encodeBase90(value, 0, false);
}
/**
* Encodes a positive or negative number into base 90. The returned string will always be
* ten characters. The fixed length encoding makes adding sign information easy.
* This is used by {@link #encodeBytesBase90(byte[])} but is public in case it is useful.
*
* @param value the number to encode
* @return the encoded string
*/
public static String encodeBase90Signed(long value) {
return encodeBase90(value, 10, true);
}
// this could be made public if someone really needs it someday
// but the for now the more complex signature is hidden.
private static String encodeBase90(long value, int minLength, boolean signed) {
boolean negative = signed && (value < 0);
char[] out = new char[MAX_NUMBER_LENGTH];
for (int i = out.length - 1; ; i--) {
int index = (int) Math.abs(value % BASE90);
// represent signed numbers
if ((i == 0) && (value < 0)) {
index += BASE45;
}
out[i] = BASECHARS[index];
value = value / BASE90;
if ((--minLength <= 0) && ((value == 0) || (i == 0))) {
// encode negative numbers
if (negative) {
out[i] = BASECHARS[index + BASE45];
}
return new String(out, i, out.length - i);
}
}
}
/**
* Encodes arbitrary bytes to a cookie safe string in a very efficent encoding.
* Each sequence of up to 8 bytes is converted into a long and encoded into a
* ten-character string using {@link #encodeBase90Signed(long)}. Then a final
* character is added representing how many bytes in the last decoded long are
* actually encoded bytes, for cases where <code>bytes</code> is not divisible by 8.
*
* @param bytes the data to encode
* @return the encoded string
*/
public static String encodeBytesBase90(byte[] bytes) {
// each 8 bytes = 10 chars
int len = bytes.length;
StringBuilder builder = new StringBuilder(((((len + Long.BYTES) - 1) / Long.BYTES) * 10) + 1);
ByteBuffer buffer = ByteBuffer.wrap(bytes);
int lastWordBytesToKeep = 8;
while(buffer.remaining() > 0) {
int remaining = buffer.remaining();
final long valueToEncode;
if (remaining >= Long.BYTES) {
valueToEncode = buffer.getLong();
} else {
buffer.position(len);
ByteBuffer temp = ByteBuffer.allocate(Long.BYTES + remaining);
temp.put(bytes, len - remaining, remaining);
temp.putLong(0L);
temp.rewind();
valueToEncode = temp.getLong();
lastWordBytesToKeep = remaining;
}
String encoded = encodeBase90Signed(valueToEncode);
builder.append(encoded);
}
builder.append(encodeBase90Unsigned((long) lastWordBytesToKeep));
return builder.toString();
}
/**
* Decodes arbitrary bytes that were encoded by {@link #encodeBytesBase90(byte[])}.
*
* @param charSequence the cookie string to decode
* @return a decoded array of bytes
*/
public static byte[] decodeBytesBase90(CharSequence charSequence) {
int len = charSequence.length() - 1;
int lastWordBytesToKeep = (int) decodeBase90(charSequence.subSequence(len, len + 1), true);
ByteBuffer buffer = ByteBuffer.allocate((((len / 10) - 1) * Long.BYTES) + lastWordBytesToKeep);
for (int pos = 0; pos < len; pos += 10) {
long decoded = decodeBase90(charSequence.subSequence(pos, pos + 10), true);
if ((pos + 10) == len) {
ByteBuffer temp = ByteBuffer.allocate(Long.BYTES);
temp.putLong(decoded);
for (int i = 0; i < lastWordBytesToKeep; i++) {
buffer.put(temp.get(i));
}
} else {
buffer.putLong(decoded);
}
}
return buffer.array();
}
/**
* All characters are in numerical order in the BASECHARS array. Here we
* simply find the position of the char in that array and return the index.
*
* @param c the char to convert from base 90
* @return the value of the char in base 10
*/
private static int decodeChar(int c) {
int decoded = 0;
if ('!' == c) {
decoded = 0;
} else if ((c >= '#') && (c <= '+')) {
decoded = (c - '#') + 1;
} else if ((c >= '-') && (c <= ':')) {
decoded = (c - '-') + 10;
} else if ((c >= '<') && (c <= '[')) {
decoded = (c - '<') + 24;
} else if ((c >= ']') && (c <= '~')) {
decoded = (c - ']') + 56;
} else {
throw new RuntimeException("invalid base encoding: " + c);
}
return decoded;
}
/**
* Decodes a long from a cookie string encoded by {@link #encodeBase90Signed(long)}
* or {@link #encodeBase90Unsigned(long)}. This method cannot determine which method
* was used to encode, it must be told if the number is signed.
*
* @param charSequence The cookie string to decode
* @param signed true if the number was encoded with <code>encodeBase90Signed</code>
* @return the decoded number
*/
public static long decodeBase90(CharSequence charSequence, boolean signed) {
long sum = 0;
// switches to -1 if negative
long sign = 1;
int len = charSequence.length();
for (int i = 0; i < len; i++) {
final int c = (int) charSequence.charAt(i);
int add = decodeChar(c);
// handle negative number decoding
if (signed && (i == 0) && (c >= MIDDLE_CHAR)) {
add -= BASE45;
sign = -1;
}
sum = (sum * (long) BASE90) + (long) add;
}
return sum * sign;
}
/**
* Decodes a long from a cookie string that was encoded by {@link #encodeBase45(long, boolean, boolean)}.
* Does not handle negative numbers. Does handle all 4 possible permutations of each number.
*
* @param charSequence The cookie string to decode
* @return the decoded number
*/
public static long decodeBase45(CharSequence charSequence) {
long sum = 0;
for (int i = 0; i < charSequence.length(); i++) {
final int c = (int) charSequence.charAt(i);
int add = decodeChar(c);
// convert base90 to base45 if necessary
if (add >= BASE45) {
add -= BASE45;
}
sum = (sum * (long) BASE45) + (long) add;
}
return sum;
}
}