/*
* Copyright 2000-2016 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.netty.buffer;
import io.netty.util.CharsetUtil;
import static io.netty.util.internal.StringUtil.isSurrogate;
// todo pull request
public class ByteBufUtilEx {
private static final byte WRITE_UTF_UNKNOWN = (byte) '?';
public static int writeUtf8(ByteBuf buf, CharSequence seq, int start, int end) {
if (buf == null) {
throw new NullPointerException("buf");
}
if (seq == null) {
throw new NullPointerException("seq");
}
// UTF-8 uses max. 3 bytes per char, so calculate the worst case.
final int len = end - start;
final int maxSize = len * 3;
buf.ensureWritable(maxSize);
int oldWriterIndex;
AbstractByteBuf buffer;
if (buf instanceof AbstractByteBuf) {
buffer = (AbstractByteBuf)buf;
oldWriterIndex = buffer.writerIndex;
}
else {
ByteBuf underlying = buf.unwrap();
if (underlying instanceof AbstractByteBuf) {
buffer = (AbstractByteBuf)underlying;
oldWriterIndex = buf.writerIndex();
}
else {
byte[] bytes = seq.toString().getBytes(CharsetUtil.UTF_8);
buf.writeBytes(bytes);
return bytes.length;
}
}
int writerIndex = oldWriterIndex;
for (int i = start; i < end; i++) {
char c = seq.charAt(i);
if (c < 0x80) {
buffer._setByte(writerIndex++, (byte)c);
}
else if (c < 0x800) {
buffer._setByte(writerIndex++, (byte)(0xc0 | (c >> 6)));
buffer._setByte(writerIndex++, (byte)(0x80 | (c & 0x3f)));
}
else if (isSurrogate(c)) {
if (!Character.isHighSurrogate(c)) {
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
continue;
}
final char c2;
try {
// Surrogate Pair consumes 2 characters. Optimistically try to get the next character to avoid
// duplicate bounds checking with charAt. If an IndexOutOfBoundsException is thrown we will
// re-throw a more informative exception describing the problem.
//noinspection AssignmentToForLoopParameter
c2 = seq.charAt(++i);
}
catch (IndexOutOfBoundsException e) {
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
break;
}
if (!Character.isLowSurrogate(c2)) {
buffer._setByte(writerIndex++, WRITE_UTF_UNKNOWN);
buffer._setByte(writerIndex++, Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2);
continue;
}
int codePoint = Character.toCodePoint(c, c2);
// See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630.
buffer._setByte(writerIndex++, (byte)(0xf0 | (codePoint >> 18)));
buffer._setByte(writerIndex++, (byte)(0x80 | ((codePoint >> 12) & 0x3f)));
buffer._setByte(writerIndex++, (byte)(0x80 | ((codePoint >> 6) & 0x3f)));
buffer._setByte(writerIndex++, (byte)(0x80 | (codePoint & 0x3f)));
}
else {
buffer._setByte(writerIndex++, (byte)(0xe0 | (c >> 12)));
buffer._setByte(writerIndex++, (byte)(0x80 | ((c >> 6) & 0x3f)));
buffer._setByte(writerIndex++, (byte)(0x80 | (c & 0x3f)));
}
}
// update the writerIndex without any extra checks for performance reasons
if (buf == buffer) {
buffer.writerIndex = writerIndex;
}
else {
buf.writerIndex(writerIndex);
}
return writerIndex - oldWriterIndex;
}
}