/*
* Copyright (c) 2015-2016, Christoph Engelbert (aka noctarius) and
* contributors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.noctarius.tengi.spi.serialization.codec.impl.utf8;
import com.noctarius.tengi.core.impl.MathUtil;
import com.noctarius.tengi.core.serialization.codec.Decoder;
import com.noctarius.tengi.core.serialization.codec.Encoder;
import com.noctarius.tengi.spi.logging.Logger;
import com.noctarius.tengi.spi.logging.LoggerManager;
import java.io.UTFDataFormatException;
import java.lang.reflect.Constructor;
/**
* Patched and broke down version from Hazelcast UTFEncoderDecoder written
* by @noctarius, @serkan-ozal and the people who probably fixed the bugs :-)
* <p>
* https://github.com/hazelcast/hazelcast/blob/master/hazelcast/src/main/java/com/hazelcast/nio/UTFEncoderDecoder.java
*/
public class UTF8Codec {
private static final Logger LOGGER = LoggerManager.getLogger(UTF8Codec.class);
private static final int STRING_CHUNK_SIZE = 16 * 1024;
private static final UTF8Codec INSTANCE;
static {
INSTANCE = buildUTF8Codec();
}
private final StringCreator stringCreator;
private final UtfWriter utfWriter;
UTF8Codec(StringCreator stringCreator, UtfWriter utfWriter) {
this.stringCreator = stringCreator;
this.utfWriter = utfWriter;
}
public static void writeUTF(Encoder encoder, String value, byte[] buffer)
throws Exception {
INSTANCE.writeUTF0(encoder, value, buffer);
}
public static String readUTF(Decoder decoder, byte[] buffer)
throws Exception {
return INSTANCE.readUTF0(decoder, buffer);
}
void writeUTF0(Encoder encoder, String value, byte[] buffer)
throws Exception {
if (!MathUtil.isPowerOfTwo(buffer.length)) {
throw new IllegalArgumentException("Size of the buffer has to be power of two, was " + buffer.length);
}
int length = value.length();
encoder.writeInt32("length1", length);
encoder.writeInt32("length2", length);
if (length > 0) {
int chunkSize = (length / STRING_CHUNK_SIZE) + 1;
for (int i = 0; i < chunkSize; i++) {
int beginIndex = Math.max(0, i * STRING_CHUNK_SIZE - 1);
int endIndex = Math.min((i + 1) * STRING_CHUNK_SIZE - 1, length);
utfWriter.writeShortUTF(encoder, value, beginIndex, endIndex, buffer);
}
}
}
String readUTF0(Decoder decoder, byte[] buffer)
throws Exception {
if (!MathUtil.isPowerOfTwo(buffer.length)) {
throw new IllegalArgumentException("Size of the buffer has to be power of two, was " + buffer.length);
}
int length = decoder.readInt32("length");
int lengthCheck = decoder.readInt32("lengthCheck");
if (length != lengthCheck) {
throw new UTFDataFormatException("Length check failed, maybe broken bytestream or wrong stream position");
}
final char[] data = new char[length];
if (length > 0) {
int chunkSize = length / STRING_CHUNK_SIZE + 1;
for (int i = 0; i < chunkSize; i++) {
int beginIndex = Math.max(0, i * STRING_CHUNK_SIZE - 1);
readShortUTF(decoder, data, beginIndex, buffer);
}
}
return stringCreator.buildString(data);
}
private void readShortUTF(Decoder decoder, char[] data, int beginIndex, byte[] buffer)
throws Exception {
final int utfLength = decoder.readShort("length") & 0xFFFF;
// buffer[0] is used to hold read data
// so actual useful length of buffer is as "length - 1"
final int minUtfLength = Math.min(utfLength, buffer.length - 1);
final int bufferLimit = minUtfLength + 1;
int readCount = 0;
// We use buffer[0] to hold read data, so position starts from 1
int bufferPos = 1;
int c1 = 0;
int c2 = 0;
int c3 = 0;
int cTemp = 0;
int charArrCount = beginIndex;
// The first readable data is at 1. index since 0. index is used to hold read data.
decoder.readBytes("data", buffer, 1, minUtfLength);
c1 = buffer[bufferPos++] & 0xFF;
while (bufferPos != bufferLimit) {
if (c1 > 127) {
break;
}
data[charArrCount++] = (char) c1;
c1 = buffer[bufferPos++] & 0xFF;
}
bufferPos--;
readCount = bufferPos - 1;
while (readCount < utfLength) {
bufferPos = buffered(buffer, bufferPos, utfLength, readCount++, decoder);
c1 = buffer[0] & 0xFF;
cTemp = c1 >> 4;
if (cTemp >> 3 == 0) {
// ((cTemp & 0xF8) == 0) or (cTemp <= 7 && cTemp >= 0)
/* 0xxxxxxx */
data[charArrCount++] = (char) c1;
} else if (cTemp == 12 || cTemp == 13) {
/* 110x xxxx 10xx xxxx */
if (readCount + 1 > utfLength) {
throw new UTFDataFormatException("malformed input: partial character at end");
}
bufferPos = buffered(buffer, bufferPos, utfLength, readCount++, decoder);
c2 = buffer[0] & 0xFF;
if ((c2 & 0xC0) != 0x80) {
throw new UTFDataFormatException("malformed input around byte " + beginIndex + readCount + 1);
}
data[charArrCount++] = (char) (((c1 & 0x1F) << 6) | (c2 & 0x3F));
} else if (cTemp == 14) {
/* 1110 xxxx 10xx xxxx 10xx xxxx */
if (readCount + 2 > utfLength) {
throw new UTFDataFormatException("malformed input: partial character at end");
}
bufferPos = buffered(buffer, bufferPos, utfLength, readCount++, decoder);
c2 = buffer[0] & 0xFF;
bufferPos = buffered(buffer, bufferPos, utfLength, readCount++, decoder);
c3 = buffer[0] & 0xFF;
if (((c2 & 0xC0) != 0x80) || ((c3 & 0xC0) != 0x80)) {
throw new UTFDataFormatException("malformed input around byte " + (beginIndex + readCount + 1));
}
data[charArrCount++] = (char) (((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | ((c3 & 0x3F)));
} else {
/* 10xx xxxx, 1111 xxxx */
throw new UTFDataFormatException("malformed input around byte " + (beginIndex + readCount));
}
}
}
private int buffered(byte[] buffer, int pos, int utfLength, int readCount, Decoder decoder)
throws Exception {
try {
// 0. index of buffer is used to hold read data
// so copy read data to there.
buffer[0] = buffer[pos];
return pos + 1;
} catch (ArrayIndexOutOfBoundsException e) {
// Array bounds check by programmatically is not needed like
// "if (pos < buffer.length)".
// JVM checks instead of us, so it is unnecessary.
decoder.readBytes("data", buffer, 1, Math.min(buffer.length - 1, utfLength - readCount));
// The first readable data is at 1. index since 0. index is used to
// hold read data.
// So the next one will be 2. index.
buffer[0] = buffer[1];
return 2;
}
}
static StringCreator createStringCreator() {
return createStringCreator(true);
}
static StringCreator createStringCreator(boolean fastStringEnabled) {
return fastStringEnabled ? buildFastStringCreator() : new DefaultStringCreator();
}
static UtfWriter createUtfWriter() {
// Try Unsafe based implementation
UnsafeBasedCharArrayUtfWriter unsafeBasedUtfWriter = new UnsafeBasedCharArrayUtfWriter();
if (unsafeBasedUtfWriter.isAvailable()) {
return unsafeBasedUtfWriter;
}
// If Unsafe based implementation is not available for usage
// Try Reflection based implementation
ReflectionBasedCharArrayUtfWriter reflectionBasedUtfWriter = new ReflectionBasedCharArrayUtfWriter();
if (reflectionBasedUtfWriter.isAvailable()) {
return reflectionBasedUtfWriter;
}
// If Reflection based implementation is not available for usage
return new StringBasedUtfWriter();
}
private static UTF8Codec buildUTF8Codec() {
UtfWriter utfWriter = createUtfWriter();
StringCreator stringCreator = createStringCreator();
return new UTF8Codec(stringCreator, utfWriter);
}
private static StringCreator buildFastStringCreator() {
try {
// Give access to the package private String constructor
Constructor<String> constructor;
if (UTF8Codec.useOldStringConstructor()) {
constructor = String.class.getDeclaredConstructor(int.class, int.class, char[].class);
} else {
constructor = String.class.getDeclaredConstructor(char[].class, boolean.class);
}
if (constructor != null) {
constructor.setAccessible(true);
return new FastStringCreator(constructor);
}
} catch (Throwable t) {
LOGGER.trace("No fast string creator seems to available, falling back to reflection");
}
return null;
}
private static boolean useOldStringConstructor() {
try {
Class<String> clazz = String.class;
clazz.getDeclaredConstructor(int.class, int.class, char[].class);
return true;
} catch (Throwable t) {
LOGGER.trace("Old String constructor doesn't seem available");
}
return false;
}
}