/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hive.benchmark.serde; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.Random; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyByte; import org.apache.hadoop.hive.serde2.lazy.LazyDate; import org.apache.hadoop.hive.serde2.lazy.LazyDouble; import org.apache.hadoop.hive.serde2.lazy.LazyFloat; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.lazy.LazyShort; import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; import org.apache.hadoop.hive.serde2.lazy.fast.StringToDouble; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; @State(Scope.Benchmark) public class LazySimpleSerDeBench { /** * This test measures the performance for LazySimpleSerDe. * <p/> * This test uses JMH framework for benchmarking. You may execute this * benchmark tool using JMH command line in different ways: * <p/> * To run using default settings, use: * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.serde.LazySimpleSerDeBench * <p/> */ public static final int DEFAULT_ITER_TIME = 1000000; public static final int DEFAULT_DATA_SIZE = 4096; @BenchmarkMode(Mode.AverageTime) @Fork(1) @State(Scope.Thread) @OutputTimeUnit(TimeUnit.MILLISECONDS) public static abstract class AbstractDeserializer { public int[] offsets = new int[DEFAULT_DATA_SIZE]; public int[] sizes = new int[DEFAULT_DATA_SIZE]; protected final ByteArrayRef ref = new ByteArrayRef(); @Setup public abstract void setup(); @Benchmark @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) public void bench() { } } public static abstract class RandomDataInitializer extends AbstractDeserializer { final int width; public RandomDataInitializer(final int width) { this.width = width; } @Override public void setup() { int len = 0; Random r = new Random(); for (int i = 0; i < sizes.length; i++) { sizes[i] = (int) (r.nextInt(width)); offsets[i] = len; len += sizes[i]; } byte[] data = new byte[len + 1]; r.nextBytes(data); ref.setData(data); } } public static abstract class GoodDataInitializer extends AbstractDeserializer { public final int max; public GoodDataInitializer(final int max) { this.max = max; } @Override public void setup() { sizes = new int[1024]; offsets = new int[sizes.length]; ByteArrayOutputStream bos = new ByteArrayOutputStream(); Random r = new Random(); int len = 0; for (int i = 0; i < sizes.length / 2; i++) { int p = r.nextInt(max); int n = -1 * (p - 1); byte[] ps = String.format("%d", p).getBytes(); byte[] ns = String.format("%d", n).getBytes(); sizes[2 * i] = ps.length; sizes[2 * i + 1] = ns.length; offsets[2 * i] = len; offsets[2 * i + 1] = len + ps.length; len += ps.length + ns.length; try { bos.write(ns); bos.write(ps); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } } ref.setData(bos.toByteArray()); } } public static class RandomLazyByte extends RandomDataInitializer { public RandomLazyByte() { super(2); } final LazyByte obj = new LazyByte( LazyPrimitiveObjectInspectorFactory.LAZY_BYTE_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class WorstLazyByte extends RandomDataInitializer { public WorstLazyByte() { super(8); } final LazyByte obj = new LazyByte( LazyPrimitiveObjectInspectorFactory.LAZY_BYTE_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class GoodLazyByte extends GoodDataInitializer { final LazyByte obj = new LazyByte( LazyPrimitiveObjectInspectorFactory.LAZY_BYTE_OBJECT_INSPECTOR); public GoodLazyByte() { super(Integer.MAX_VALUE); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class RandomLazyShort extends RandomDataInitializer { public RandomLazyShort() { super(2); } final LazyShort obj = new LazyShort( LazyPrimitiveObjectInspectorFactory.LAZY_SHORT_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class WorstLazyShort extends RandomDataInitializer { public WorstLazyShort() { super(8); } final LazyShort obj = new LazyShort( LazyPrimitiveObjectInspectorFactory.LAZY_SHORT_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class GoodLazyShort extends GoodDataInitializer { final LazyShort obj = new LazyShort( LazyPrimitiveObjectInspectorFactory.LAZY_SHORT_OBJECT_INSPECTOR); public GoodLazyShort() { super(Integer.MAX_VALUE); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class RandomLazyInteger extends RandomDataInitializer { public RandomLazyInteger() { super(2); } final LazyInteger obj = new LazyInteger( LazyPrimitiveObjectInspectorFactory.LAZY_INT_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class WorstLazyInteger extends RandomDataInitializer { public WorstLazyInteger() { super(8); } final LazyInteger obj = new LazyInteger( LazyPrimitiveObjectInspectorFactory.LAZY_INT_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class GoodLazyInteger extends GoodDataInitializer { final LazyInteger obj = new LazyInteger( LazyPrimitiveObjectInspectorFactory.LAZY_INT_OBJECT_INSPECTOR); public GoodLazyInteger() { super(Integer.MAX_VALUE); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class RandomLazyFloat extends RandomDataInitializer { public RandomLazyFloat() { super(2); } final LazyFloat obj = new LazyFloat( LazyPrimitiveObjectInspectorFactory.LAZY_FLOAT_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class WorstLazyFloat extends RandomDataInitializer { public WorstLazyFloat() { super(8); } final LazyFloat obj = new LazyFloat( LazyPrimitiveObjectInspectorFactory.LAZY_FLOAT_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class GoodLazyFloat extends GoodDataInitializer { final LazyFloat obj = new LazyFloat( LazyPrimitiveObjectInspectorFactory.LAZY_FLOAT_OBJECT_INSPECTOR); public GoodLazyFloat() { super(Integer.MAX_VALUE); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class RandomLazyLong extends RandomDataInitializer { public RandomLazyLong() { super(2); } final LazyLong obj = new LazyLong( LazyPrimitiveObjectInspectorFactory.LAZY_LONG_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class WorstLazyLong extends RandomDataInitializer { public WorstLazyLong() { super(8); } final LazyLong obj = new LazyLong( LazyPrimitiveObjectInspectorFactory.LAZY_LONG_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class GoodLazyLong extends GoodDataInitializer { final LazyLong obj = new LazyLong( LazyPrimitiveObjectInspectorFactory.LAZY_LONG_OBJECT_INSPECTOR); public GoodLazyLong() { super(Integer.MAX_VALUE); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class RandomLazyDouble extends RandomDataInitializer { public RandomLazyDouble() { super(2); } final LazyDouble obj = new LazyDouble( LazyPrimitiveObjectInspectorFactory.LAZY_DOUBLE_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class WorstLazyDouble extends RandomDataInitializer { public WorstLazyDouble() { super(8); } final LazyDouble obj = new LazyDouble( LazyPrimitiveObjectInspectorFactory.LAZY_DOUBLE_OBJECT_INSPECTOR); @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class GoodLazyDouble extends GoodDataInitializer { final LazyDouble obj = new LazyDouble( LazyPrimitiveObjectInspectorFactory.LAZY_DOUBLE_OBJECT_INSPECTOR); public GoodLazyDouble() { super(Integer.MAX_VALUE); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } @BenchmarkMode(Mode.AverageTime) @Fork(1) @OutputTimeUnit(TimeUnit.MILLISECONDS) @Warmup(iterations = 4, time = 2, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 4, time = 2, timeUnit = TimeUnit.MILLISECONDS) @State(Scope.Thread) public static class ParseDouble { byte[] bytes = "1234567890.12345".getBytes(StandardCharsets.UTF_8); @Benchmark public void floatingDecimalBench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { StringToDouble.strtod(bytes, 0, bytes.length); } } @Benchmark public void doubleBench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { Double.parseDouble(new String(bytes, 0, bytes.length, StandardCharsets.UTF_8)); } } } @BenchmarkMode(Mode.AverageTime) @Fork(1) @State(Scope.Thread) @OutputTimeUnit(TimeUnit.MILLISECONDS) public static class GoodLazyDate { final LazyDate obj = new LazyDate( LazyPrimitiveObjectInspectorFactory.LAZY_DATE_OBJECT_INSPECTOR); public int[] offsets = new int[DEFAULT_DATA_SIZE]; public int[] sizes = new int[DEFAULT_DATA_SIZE]; protected final ByteArrayRef ref = new ByteArrayRef(); @Setup public void setup() { sizes = new int[DEFAULT_DATA_SIZE]; offsets = new int[sizes.length]; ByteArrayOutputStream bos = new ByteArrayOutputStream(); Random r = new Random(); int len = 0; final long base = -320000000L*1000L; // 1959 for (int i = 0; i < DEFAULT_DATA_SIZE; i++) { // -ve dates are also valid dates - the dates are within 1959 to 2027 Date dt = new Date(base + (Math.abs(r.nextLong()) % (Integer.MAX_VALUE*1000L))); byte[] ds = dt.toString().getBytes(); sizes[i] = ds.length; offsets[i] = len; len += ds.length; try { bos.write(ds); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } } ref.setData(bos.toByteArray()); } @Benchmark @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class RandomLazyDate extends RandomDataInitializer { final LazyDate obj = new LazyDate( LazyPrimitiveObjectInspectorFactory.LAZY_DATE_OBJECT_INSPECTOR); public RandomLazyDate() { super(4); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class WorstLazyDate extends RandomDataInitializer { final LazyDate obj = new LazyDate( LazyPrimitiveObjectInspectorFactory.LAZY_DATE_OBJECT_INSPECTOR); public WorstLazyDate() { super(8); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } @BenchmarkMode(Mode.AverageTime) @Fork(1) @State(Scope.Thread) @OutputTimeUnit(TimeUnit.MILLISECONDS) public static class GoodLazyTimestamp { final LazyTimestamp obj = new LazyTimestamp( LazyPrimitiveObjectInspectorFactory.LAZY_TIMESTAMP_OBJECT_INSPECTOR); public int[] offsets = new int[DEFAULT_DATA_SIZE]; public int[] sizes = new int[DEFAULT_DATA_SIZE]; protected final ByteArrayRef ref = new ByteArrayRef(); @Setup public void setup() { sizes = new int[DEFAULT_DATA_SIZE]; offsets = new int[sizes.length]; ByteArrayOutputStream bos = new ByteArrayOutputStream(); Random r = new Random(); int len = 0; final long base = -320000000L * 1000L; // 1959 for (int i = 0; i < DEFAULT_DATA_SIZE; i++) { // -ve dates are also valid Timestamps - dates are within 1959 to 2027 Date dt = new Date(base + (Math.abs(r.nextLong()) % (Integer.MAX_VALUE * 1000L))); byte[] ds = String.format("%s 00:00:01", dt.toString()).getBytes(); sizes[i] = ds.length; offsets[i] = len; len += ds.length; try { bos.write(ds); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } } ref.setData(bos.toByteArray()); } @Benchmark @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class RandomLazyTimestamp extends RandomDataInitializer { final LazyTimestamp obj = new LazyTimestamp( LazyPrimitiveObjectInspectorFactory.LAZY_TIMESTAMP_OBJECT_INSPECTOR); public RandomLazyTimestamp() { super(4); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static class WorstLazyTimestamp extends RandomDataInitializer { final LazyTimestamp obj = new LazyTimestamp( LazyPrimitiveObjectInspectorFactory.LAZY_TIMESTAMP_OBJECT_INSPECTOR); public WorstLazyTimestamp() { super(8); } @Override public void bench() { for (int i = 0; i < DEFAULT_ITER_TIME; i++) { obj.init(ref, offsets[i % sizes.length], sizes[i % sizes.length]); } } } public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder().include( ".*" + LazySimpleSerDeBench.class.getSimpleName() + ".*").build(); new Runner(opt).run(); } }