package se.sperber.cryson.util;
import org.junit.Test;
import java.nio.charset.Charset;
import static org.junit.Assert.*;
public class StringUtilsTest {
@Test
public void testCountUtf8Bytes() throws Exception {
Charset utf8 = Charset.forName("UTF-8");
AllCodepointsIterator iterator = new AllCodepointsIterator();
while (iterator.hasNext()) {
String test = new String(Character.toChars(iterator.next()));
assertEquals(test.getBytes(utf8).length, StringUtils.countUtf8Bytes(test));
}
}
private static class AllCodepointsIterator {
private static final int MAX = 0x10FFFF; //see http://unicode.org/glossary/
private static final int SURROGATE_FIRST = 0xD800;
private static final int SURROGATE_LAST = 0xDFFF;
private int codepoint = 0;
public boolean hasNext() { return codepoint < MAX; }
public int next() {
int ret = codepoint;
codepoint = next(codepoint);
return ret;
}
private int next(int codepoint) {
while (codepoint++ < MAX) {
if (codepoint == SURROGATE_FIRST) { codepoint = SURROGATE_LAST + 1; }
if (!Character.isDefined(codepoint)) { continue; }
return codepoint;
}
return MAX;
}
}
}