/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.basis.chars;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import java.nio.charset.StandardCharsets;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import io.netty.buffer.Unpooled;
/**
* Benchmarks various CharBuf classes against each other and String. This setup is designed
* to test iteration over CharSequences via the charAt method for character strings that
* (perhaps unknown to the implementation) consist entirely of ascii characters. Iteration
* is a theoretical sore spot since although almost all string operations rely on it, the api
* only supports it through 'random' lookups. Random lookups are in turn a sore spot for
* string representations that have variable width characters.
*
* This case is likely easier for CharBufs than non-ascii purely random look-ups, but sequential
* access to ascii characters is the use case we expect to be most common. Locally, I have seen
* both ByteArray CharBufs out perform String (one knows it is ascii only, and the other does not).
*/
public class CharSeqOnlyIter {
public static final int STRING_SIZE = Integer.getInteger("string.size", 64000);
@BenchmarkMode(Mode.Throughput) // measure as ops/ time_unit
@OutputTimeUnit(TimeUnit.MICROSECONDS) // time_unit is microseconds
@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) // how long to warm up the jvm
@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) // how many runs to average over
@Fork(1) // how many JVM forks per test; measurements are run per fork
@Threads(1) // how many threads to run concurrently; thread count is per test -- not shared
@State(Scope.Thread)
public abstract static class AbstractCharSeqBench<T extends CharSequence> {
int index = 0;
T string;
@Setup(Level.Trial)
public void makeStrings() {
char[] values = new char[STRING_SIZE];
for (int i = 0; i < STRING_SIZE; i++) {
values[i] = (char) ((i % 100) + 5);
}
String asString = new String(values);
byte[] bytes = asString.getBytes(StandardCharsets.UTF_8);
string = makeString(bytes);
}
@Benchmark
public char iterateString() throws IOException {
char c = string.charAt(index);
index++;
if (index >= STRING_SIZE) {
index = 0;
}
return c;
}
public abstract T makeString(byte[] bytes);
}
public static class JavaString extends AbstractCharSeqBench<String> {
@Override
public String makeString(byte[] bytes) {
return new String(bytes, StandardCharsets.UTF_8);
}
}
public static class ByteArrayAscii extends AbstractCharSeqBench<ByteArrayReadOnlyAsciiBuf> {
@Override
public ByteArrayReadOnlyAsciiBuf makeString(byte[] bytes) {
return new ByteArrayReadOnlyAsciiBuf(bytes);
}
}
public static class ByteArrayUtf extends AbstractCharSeqBench<ByteArrayReadOnlyUtfBuf> {
@Override
public ByteArrayReadOnlyUtfBuf makeString(byte[] bytes) {
return new ByteArrayReadOnlyUtfBuf(bytes);
}
}
public static class ByteBufUtf extends AbstractCharSeqBench<ReadOnlyUtfBuf> {
@Override
public ReadOnlyUtfBuf makeString(byte[] bytes) {
return new ReadOnlyUtfBuf(Unpooled.wrappedBuffer(bytes));
}
}
}