/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package parquet.column.statistics; import static org.junit.Assert.*; import java.nio.ByteBuffer; import org.junit.Test; import parquet.io.api.Binary; public class TestStatistics { private int[] integerArray; private long[] longArray; private float[] floatArray; private double[] doubleArray; private String[] stringArray; private boolean[] booleanArray; @Test public void testNumNulls() { IntStatistics stats = new IntStatistics(); assertEquals(stats.getNumNulls(), 0); stats.incrementNumNulls(); stats.incrementNumNulls(); stats.incrementNumNulls(); stats.incrementNumNulls(); assertEquals(stats.getNumNulls(), 4); stats.incrementNumNulls(5); assertEquals(stats.getNumNulls(), 9); stats.setNumNulls(22); assertEquals(stats.getNumNulls(), 22); } @Test public void testIntMinMax() { // Test basic max/min integerArray = new int[] {1, 3, 14, 54, 66, 8, 0, 23, 54}; IntStatistics stats = new IntStatistics(); for (int i: integerArray) { stats.updateStats(i); } assertEquals(stats.getMax(), 66); assertEquals(stats.getMin(), 0); // Test negative values integerArray = new int[] {-11, 3, -14, 54, -66, 8, 0, -23, 54}; IntStatistics statsNeg = new IntStatistics(); for (int i: integerArray) { statsNeg.updateStats(i); } assertEquals(statsNeg.getMax(), 54); assertEquals(statsNeg.getMin(), -66); // Test converting to and from byte[] byte[] intMaxBytes = statsNeg.getMaxBytes(); byte[] intMinBytes = statsNeg.getMinBytes(); assertEquals(ByteBuffer.wrap(intMaxBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(), 54); assertEquals(ByteBuffer.wrap(intMinBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(), -66); IntStatistics statsFromBytes = new IntStatistics(); statsFromBytes.setMinMaxFromBytes(intMinBytes, intMaxBytes); assertEquals(statsFromBytes.getMax(), 54); assertEquals(statsFromBytes.getMin(), -66); integerArray = new int[] {Integer.MAX_VALUE, Integer.MIN_VALUE}; IntStatistics minMaxValues = new IntStatistics(); for (int i: integerArray) { minMaxValues.updateStats(i); } assertEquals(minMaxValues.getMax(), Integer.MAX_VALUE); assertEquals(minMaxValues.getMin(), Integer.MIN_VALUE); // Test converting to and from byte[] for large and small values byte[] intMaxBytesMinMax = minMaxValues.getMaxBytes(); byte[] intMinBytesMinMax = minMaxValues.getMinBytes(); assertEquals(ByteBuffer.wrap(intMaxBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(), Integer.MAX_VALUE); assertEquals(ByteBuffer.wrap(intMinBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(), Integer.MIN_VALUE); IntStatistics statsFromBytesMinMax= new IntStatistics(); statsFromBytesMinMax.setMinMaxFromBytes(intMinBytesMinMax, intMaxBytesMinMax); assertEquals(statsFromBytesMinMax.getMax(), Integer.MAX_VALUE); assertEquals(statsFromBytesMinMax.getMin(), Integer.MIN_VALUE); // Test print formatting assertEquals(stats.toString(), "min: 0, max: 66, num_nulls: 0"); } @Test public void testLongMinMax() { // Test basic max/min longArray = new long[] {9, 39, 99, 3, 0, 12, 1000, 65, 542}; LongStatistics stats = new LongStatistics(); for (long l: longArray) { stats.updateStats(l); } assertEquals(stats.getMax(), 1000); assertEquals(stats.getMin(), 0); // Test negative values longArray = new long[] {-101, 993, -9914, 54, -9, 89, 0, -23, 90}; LongStatistics statsNeg = new LongStatistics(); for (long l: longArray) { statsNeg.updateStats(l); } assertEquals(statsNeg.getMax(), 993); assertEquals(statsNeg.getMin(), -9914); // Test converting to and from byte[] byte[] longMaxBytes = statsNeg.getMaxBytes(); byte[] longMinBytes = statsNeg.getMinBytes(); assertEquals(ByteBuffer.wrap(longMaxBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong(), 993); assertEquals(ByteBuffer.wrap(longMinBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong(), -9914); LongStatistics statsFromBytes = new LongStatistics(); statsFromBytes.setMinMaxFromBytes(longMinBytes, longMaxBytes); assertEquals(statsFromBytes.getMax(), 993); assertEquals(statsFromBytes.getMin(), -9914); longArray = new long[] {Long.MAX_VALUE, Long.MIN_VALUE}; LongStatistics minMaxValues = new LongStatistics(); for (long l: longArray) { minMaxValues.updateStats(l); } assertEquals(minMaxValues.getMax(), Long.MAX_VALUE); assertEquals(minMaxValues.getMin(), Long.MIN_VALUE); // Test converting to and from byte[] for large and small values byte[] longMaxBytesMinMax = minMaxValues.getMaxBytes(); byte[] longMinBytesMinMax = minMaxValues.getMinBytes(); assertEquals(ByteBuffer.wrap(longMaxBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong(), Long.MAX_VALUE); assertEquals(ByteBuffer.wrap(longMinBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong(), Long.MIN_VALUE); LongStatistics statsFromBytesMinMax= new LongStatistics(); statsFromBytesMinMax.setMinMaxFromBytes(longMinBytesMinMax, longMaxBytesMinMax); assertEquals(statsFromBytesMinMax.getMax(), Long.MAX_VALUE); assertEquals(statsFromBytesMinMax.getMin(), Long.MIN_VALUE); // Test print formatting assertEquals(stats.toString(), "min: 0, max: 1000, num_nulls: 0"); } @Test public void testFloatMinMax() { // Test basic max/min floatArray = new float[] {1.5f, 44.5f, 412.99f, 0.65f, 5.6f, 100.6f, 0.0001f, 23.0f, 553.6f}; FloatStatistics stats = new FloatStatistics(); for (float f: floatArray) { stats.updateStats(f); } assertEquals(stats.getMax(), 553.6f, 1e-10); assertEquals(stats.getMin(), 0.0001f, 1e-10); // Test negative values floatArray = new float[] {-1.5f, -44.5f, -412.99f, 0.65f, -5.6f, -100.6f, 0.0001f, -23.0f, -3.6f}; FloatStatistics statsNeg = new FloatStatistics(); for (float f: floatArray) { statsNeg.updateStats(f); } assertEquals(statsNeg.getMax(), 0.65f, 1e-10); assertEquals(statsNeg.getMin(), -412.99f, 1e-10); // Test converting to and from byte[] byte[] floatMaxBytes = statsNeg.getMaxBytes(); byte[] floatMinBytes = statsNeg.getMinBytes(); assertEquals(ByteBuffer.wrap(floatMaxBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getFloat(), 0.65f, 1e-10); assertEquals(ByteBuffer.wrap(floatMinBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getFloat(), -412.99f, 1e-10); FloatStatistics statsFromBytes = new FloatStatistics(); statsFromBytes.setMinMaxFromBytes(floatMinBytes, floatMaxBytes); assertEquals(statsFromBytes.getMax(), 0.65f, 1e-10); assertEquals(statsFromBytes.getMin(), -412.99f, 1e-10); floatArray = new float[] {Float.MAX_VALUE, Float.MIN_VALUE}; FloatStatistics minMaxValues = new FloatStatistics(); for (float f: floatArray) { minMaxValues.updateStats(f); } assertEquals(minMaxValues.getMax(), Float.MAX_VALUE, 1e-10); assertEquals(minMaxValues.getMin(), Float.MIN_VALUE, 1e-10); // Test converting to and from byte[] for large and small values byte[] floatMaxBytesMinMax = minMaxValues.getMaxBytes(); byte[] floatMinBytesMinMax = minMaxValues.getMinBytes(); assertEquals(ByteBuffer.wrap(floatMaxBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getFloat(), Float.MAX_VALUE, 1e-10); assertEquals(ByteBuffer.wrap(floatMinBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getFloat(), Float.MIN_VALUE, 1e-10); FloatStatistics statsFromBytesMinMax= new FloatStatistics(); statsFromBytesMinMax.setMinMaxFromBytes(floatMinBytesMinMax, floatMaxBytesMinMax); assertEquals(statsFromBytesMinMax.getMax(), Float.MAX_VALUE, 1e-10); assertEquals(statsFromBytesMinMax.getMin(), Float.MIN_VALUE, 1e-10); // Test print formatting assertEquals(stats.toString(), "min: 0.00010, max: 553.59998, num_nulls: 0"); } @Test public void testDoubleMinMax() { // Test basic max/min doubleArray = new double[] {81.5d, 944.5f, 2.002d, 334.5d, 5.6d, 0.001d, 0.00001d, 23.0d, 553.6d}; DoubleStatistics stats = new DoubleStatistics(); for (double d: doubleArray) { stats.updateStats(d); } assertEquals(stats.getMax(), 944.5d, 1e-10); assertEquals(stats.getMin(), 0.00001d, 1e-10); // Test negative values doubleArray = new double[] {-81.5d, -944.5d, 2.002d, -334.5d, -5.6d, -0.001d, -0.00001d, 23.0d, -3.6d}; DoubleStatistics statsNeg = new DoubleStatistics(); for (double d: doubleArray) { statsNeg.updateStats(d); } assertEquals(statsNeg.getMax(), 23.0d, 1e-10); assertEquals(statsNeg.getMin(), -944.5d, 1e-10); // Test converting to and from byte[] byte[] doubleMaxBytes = statsNeg.getMaxBytes(); byte[] doubleMinBytes = statsNeg.getMinBytes(); assertEquals(ByteBuffer.wrap(doubleMaxBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getDouble(), 23.0d, 1e-10); assertEquals(ByteBuffer.wrap(doubleMinBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getDouble(), -944.5d, 1e-10); DoubleStatistics statsFromBytes = new DoubleStatistics(); statsFromBytes.setMinMaxFromBytes(doubleMinBytes, doubleMaxBytes); assertEquals(statsFromBytes.getMax(), 23.0d, 1e-10); assertEquals(statsFromBytes.getMin(), -944.5d, 1e-10); doubleArray = new double[] {Double.MAX_VALUE, Double.MIN_VALUE}; DoubleStatistics minMaxValues = new DoubleStatistics(); for (double d: doubleArray) { minMaxValues.updateStats(d); } assertEquals(minMaxValues.getMax(), Double.MAX_VALUE, 1e-10); assertEquals(minMaxValues.getMin(), Double.MIN_VALUE, 1e-10); // Test converting to and from byte[] for large and small values byte[] doubleMaxBytesMinMax = minMaxValues.getMaxBytes(); byte[] doubleMinBytesMinMax = minMaxValues.getMinBytes(); assertEquals(ByteBuffer.wrap(doubleMaxBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getDouble(), Double.MAX_VALUE, 1e-10); assertEquals(ByteBuffer.wrap(doubleMinBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getDouble(), Double.MIN_VALUE, 1e-10); DoubleStatistics statsFromBytesMinMax= new DoubleStatistics(); statsFromBytesMinMax.setMinMaxFromBytes(doubleMinBytesMinMax, doubleMaxBytesMinMax); assertEquals(statsFromBytesMinMax.getMax(), Double.MAX_VALUE, 1e-10); assertEquals(statsFromBytesMinMax.getMin(), Double.MIN_VALUE, 1e-10); // Test print formatting assertEquals(stats.toString(), "min: 0.00001, max: 944.50000, num_nulls: 0"); } @Test public void testBooleanMinMax() { // Test all true booleanArray = new boolean[] {true, true, true}; BooleanStatistics statsTrue = new BooleanStatistics(); for (boolean i: booleanArray) { statsTrue.updateStats(i); } assertTrue(statsTrue.getMax()); assertTrue(statsTrue.getMin()); // Test all false booleanArray = new boolean[] {false, false, false}; BooleanStatistics statsFalse = new BooleanStatistics(); for (boolean i: booleanArray) { statsFalse.updateStats(i); } assertFalse(statsFalse.getMax()); assertFalse(statsFalse.getMin()); booleanArray = new boolean[] {false, true, false}; BooleanStatistics statsBoth = new BooleanStatistics(); for (boolean i: booleanArray) { statsBoth.updateStats(i); } assertTrue(statsBoth.getMax()); assertFalse(statsBoth.getMin()); // Test converting to and from byte[] byte[] boolMaxBytes = statsBoth.getMaxBytes(); byte[] boolMinBytes = statsBoth.getMinBytes(); assertEquals((int)(boolMaxBytes[0] & 255), 1); assertEquals((int)(boolMinBytes[0] & 255), 0); BooleanStatistics statsFromBytes = new BooleanStatistics(); statsFromBytes.setMinMaxFromBytes(boolMinBytes, boolMaxBytes); assertTrue(statsFromBytes.getMax()); assertFalse(statsFromBytes.getMin()); // Test print formatting assertEquals(statsBoth.toString(), "min: false, max: true, num_nulls: 0"); } @Test public void testBinaryMinMax() { //Test basic max/min stringArray = new String[] {"hello", "world", "this", "is", "a", "test", "of", "the", "stats", "class"}; BinaryStatistics stats = new BinaryStatistics(); for (String s: stringArray) { stats.updateStats(Binary.fromString(s)); } assertEquals(stats.getMax(), Binary.fromString("world")); assertEquals(stats.getMin(), Binary.fromString("a")); // Test empty string stringArray = new String[] {"", "", "", "", ""}; BinaryStatistics statsEmpty = new BinaryStatistics(); for (String s: stringArray) { statsEmpty.updateStats(Binary.fromString(s)); } assertEquals(statsEmpty.getMax(), Binary.fromString("")); assertEquals(statsEmpty.getMin(), Binary.fromString("")); // Test converting to and from byte[] byte[] stringMaxBytes = stats.getMaxBytes(); byte[] stringMinBytes = stats.getMinBytes(); assertEquals(new String(stringMaxBytes), "world"); assertEquals(new String(stringMinBytes), "a"); BinaryStatistics statsFromBytes = new BinaryStatistics(); statsFromBytes.setMinMaxFromBytes(stringMinBytes, stringMaxBytes); assertEquals(statsFromBytes.getMax(), Binary.fromString("world")); assertEquals(statsFromBytes.getMin(), Binary.fromString("a")); // Test print formatting assertEquals(stats.toString(), "min: a, max: world, num_nulls: 0"); } @Test public void testMergingStatistics() { testMergingIntStats(); testMergingLongStats(); testMergingFloatStats(); testMergingDoubleStats(); testMergingBooleanStats(); testMergingStringStats(); } private void testMergingIntStats() { integerArray = new int[] {1, 2, 3, 4, 5}; IntStatistics intStats = new IntStatistics(); for (int s: integerArray) { intStats.updateStats(s); } integerArray = new int[] {0, 3, 3}; IntStatistics intStats2 = new IntStatistics(); for (int s: integerArray) { intStats2.updateStats(s); } intStats.mergeStatistics(intStats2); assertEquals(intStats.getMax(), 5); assertEquals(intStats.getMin(), 0); integerArray = new int[] {-1, -100, 100}; IntStatistics intStats3 = new IntStatistics(); for (int s: integerArray) { intStats3.updateStats(s); } intStats.mergeStatistics(intStats3); assertEquals(intStats.getMax(), 100); assertEquals(intStats.getMin(), -100); } private void testMergingLongStats() { longArray = new long[] {1l, 2l, 3l, 4l, 5l}; LongStatistics longStats = new LongStatistics(); for (long s: longArray) { longStats.updateStats(s); } longArray = new long[] {0l, 3l, 3l}; LongStatistics longStats2 = new LongStatistics(); for (long s: longArray) { longStats2.updateStats(s); } longStats.mergeStatistics(longStats2); assertEquals(longStats.getMax(), 5l); assertEquals(longStats.getMin(), 0l); longArray = new long[] {-1l, -100l, 100l}; LongStatistics longStats3 = new LongStatistics(); for (long s: longArray) { longStats3.updateStats(s); } longStats.mergeStatistics(longStats3); assertEquals(longStats.getMax(), 100l); assertEquals(longStats.getMin(), -100l); } private void testMergingFloatStats() { floatArray = new float[] {1.44f, 12.2f, 98.3f, 1.4f, 0.05f}; FloatStatistics floatStats = new FloatStatistics(); for (float s: floatArray) { floatStats.updateStats(s); } floatArray = new float[] {0.0001f, 9.9f, 3.1f}; FloatStatistics floatStats2 = new FloatStatistics(); for (float s: floatArray) { floatStats2.updateStats(s); } floatStats.mergeStatistics(floatStats2); assertEquals(floatStats.getMax(), 98.3f, 1e-10); assertEquals(floatStats.getMin(), 0.0001f, 1e-10); floatArray = new float[] {-1.91f, -100.9f, 100.54f}; FloatStatistics floatStats3 = new FloatStatistics(); for (float s: floatArray) { floatStats3.updateStats(s); } floatStats.mergeStatistics(floatStats3); assertEquals(floatStats.getMax(), 100.54f, 1e-10); assertEquals(floatStats.getMin(), -100.9f, 1e-10); } private void testMergingDoubleStats() { doubleArray = new double[] {1.44d, 12.2d, 98.3d, 1.4d, 0.05d}; DoubleStatistics doubleStats = new DoubleStatistics(); for (double s: doubleArray) { doubleStats.updateStats(s); } doubleArray = new double[] {0.0001d, 9.9d, 3.1d}; DoubleStatistics doubleStats2 = new DoubleStatistics(); for (double s: doubleArray) { doubleStats2.updateStats(s); } doubleStats.mergeStatistics(doubleStats2); assertEquals(doubleStats.getMax(), 98.3d, 1e-10); assertEquals(doubleStats.getMin(), 0.0001d, 1e-10); doubleArray = new double[] {-1.91d, -100.9d, 100.54d}; DoubleStatistics doubleStats3 = new DoubleStatistics(); for (double s: doubleArray) { doubleStats3.updateStats(s); } doubleStats.mergeStatistics(doubleStats3); assertEquals(doubleStats.getMax(), 100.54d, 1e-10); assertEquals(doubleStats.getMin(), -100.9d, 1e-10); } private void testMergingBooleanStats() { booleanArray = new boolean[] {true, true, true}; BooleanStatistics booleanStats = new BooleanStatistics(); for (boolean s: booleanArray) { booleanStats.updateStats(s); } booleanArray = new boolean[] {true, false}; BooleanStatistics booleanStats2 = new BooleanStatistics(); for (boolean s: booleanArray) { booleanStats2.updateStats(s); } booleanStats.mergeStatistics(booleanStats2); assertEquals(booleanStats.getMax(), true); assertEquals(booleanStats.getMin(), false); booleanArray = new boolean[] {false, false, false, false}; BooleanStatistics booleanStats3 = new BooleanStatistics(); for (boolean s: booleanArray) { booleanStats3.updateStats(s); } booleanStats.mergeStatistics(booleanStats3); assertEquals(booleanStats.getMax(), true); assertEquals(booleanStats.getMin(), false); } private void testMergingStringStats() { stringArray = new String[] {"hello", "world", "this", "is", "a", "test", "of", "the", "stats", "class"}; BinaryStatistics stats = new BinaryStatistics(); for (String s: stringArray) { stats.updateStats(Binary.fromString(s)); } stringArray = new String[] {"zzzz", "asdf", "testing"}; BinaryStatistics stats2 = new BinaryStatistics(); for (String s: stringArray) { stats2.updateStats(Binary.fromString(s)); } stats.mergeStatistics(stats2); assertEquals(stats.getMax(), Binary.fromString("zzzz")); assertEquals(stats.getMin(), Binary.fromString("a")); stringArray = new String[] {"", "good", "testing"}; BinaryStatistics stats3 = new BinaryStatistics(); for (String s: stringArray) { stats3.updateStats(Binary.fromString(s)); } stats.mergeStatistics(stats3); assertEquals(stats.getMax(), Binary.fromString("zzzz")); assertEquals(stats.getMin(), Binary.fromString("")); } }