/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc;
import com.facebook.presto.spi.type.CharType;
import com.facebook.presto.spi.type.DecimalType;
import com.facebook.presto.spi.type.SqlDate;
import com.facebook.presto.spi.type.SqlDecimal;
import com.facebook.presto.spi.type.SqlTimestamp;
import com.facebook.presto.spi.type.SqlVarbinary;
import com.google.common.base.Strings;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.DiscreteDomain;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.Range;
import org.joda.time.DateTimeZone;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import static com.facebook.presto.orc.OrcTester.HIVE_STORAGE_TIME_ZONE;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.spi.type.BooleanType.BOOLEAN;
import static com.facebook.presto.spi.type.CharType.createCharType;
import static com.facebook.presto.spi.type.DateType.DATE;
import static com.facebook.presto.spi.type.DoubleType.DOUBLE;
import static com.facebook.presto.spi.type.IntegerType.INTEGER;
import static com.facebook.presto.spi.type.RealType.REAL;
import static com.facebook.presto.spi.type.SmallintType.SMALLINT;
import static com.facebook.presto.spi.type.TimeZoneKey.UTC_KEY;
import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP;
import static com.facebook.presto.spi.type.TinyintType.TINYINT;
import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY;
import static com.facebook.presto.spi.type.VarcharType.VARCHAR;
import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.cycle;
import static com.google.common.collect.Iterables.limit;
import static com.google.common.collect.Lists.newArrayList;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Collections.nCopies;
import static java.util.stream.Collectors.toList;
import static org.testng.Assert.assertEquals;
public abstract class AbstractTestOrcReader
{
private static final int CHAR_LENGTH = 10;
private static final DecimalType DECIMAL_TYPE_PRECISION_2 = DecimalType.createDecimalType(2, 1);
private static final DecimalType DECIMAL_TYPE_PRECISION_4 = DecimalType.createDecimalType(4, 2);
private static final DecimalType DECIMAL_TYPE_PRECISION_8 = DecimalType.createDecimalType(8, 4);
private static final DecimalType DECIMAL_TYPE_PRECISION_17 = DecimalType.createDecimalType(17, 8);
private static final DecimalType DECIMAL_TYPE_PRECISION_18 = DecimalType.createDecimalType(18, 8);
private static final DecimalType DECIMAL_TYPE_PRECISION_38 = DecimalType.createDecimalType(38, 16);
private static final CharType CHAR = createCharType(CHAR_LENGTH);
private final OrcTester tester;
public AbstractTestOrcReader(OrcTester tester)
{
this.tester = tester;
}
@BeforeClass
public void setUp()
{
assertEquals(DateTimeZone.getDefault(), HIVE_STORAGE_TIME_ZONE);
}
@Test
public void testBooleanSequence()
throws Exception
{
tester.testRoundTrip(BOOLEAN, newArrayList(limit(cycle(ImmutableList.of(true, false, false)), 30_000)));
}
@Test
public void testLongSequence()
throws Exception
{
testRoundTripNumeric(intsBetween(0, 31_234));
}
@Test
public void testLongSequenceWithHoles()
throws Exception
{
testRoundTripNumeric(skipEvery(5, intsBetween(0, 31_234)));
}
@Test
public void testLongDirect()
throws Exception
{
testRoundTripNumeric(limit(cycle(ImmutableList.of(1, 3, 5, 7, 11, 13, 17)), 30_000));
}
@Test
public void testLongDirect2()
throws Exception
{
List<Integer> values = new ArrayList<>(31_234);
for (int i = 0; i < 31_234; i++) {
values.add(i);
}
Collections.shuffle(values, new Random(0));
testRoundTripNumeric(values);
}
@Test
public void testLongShortRepeat()
throws Exception
{
testRoundTripNumeric(limit(repeatEach(4, cycle(ImmutableList.of(1, 3, 5, 7, 11, 13, 17))), 30_000));
}
@Test
public void testLongPatchedBase()
throws Exception
{
testRoundTripNumeric(limit(cycle(concat(intsBetween(0, 18), ImmutableList.of(30_000, 20_000))), 30_000));
}
@Test
public void testLongStrideDictionary()
throws Exception
{
testRoundTripNumeric(concat(ImmutableList.of(1), nCopies(9999, 123), ImmutableList.of(2), nCopies(9999, 123)));
}
private void testRoundTripNumeric(Iterable<? extends Number> values)
throws Exception
{
List<Long> writeValues = ImmutableList.copyOf(values).stream()
.map(Number::longValue)
.collect(toList());
tester.testRoundTrip(
TINYINT,
writeValues.stream()
.map(Long::byteValue) // truncate values to byte range
.collect(toList()));
tester.testRoundTrip(
SMALLINT,
writeValues.stream()
.map(Long::shortValue) // truncate values to short range
.collect(toList()));
tester.testRoundTrip(
INTEGER,
writeValues.stream()
.map(Long::intValue) // truncate values to int range
.collect(toList()));
tester.testRoundTrip(BIGINT, writeValues);
tester.testRoundTrip(
DATE,
writeValues.stream()
.map(Long::intValue)
.map(SqlDate::new)
.collect(toList()));
tester.testRoundTrip(
TIMESTAMP,
writeValues.stream()
.map(timestamp -> new SqlTimestamp(timestamp, UTC_KEY))
.collect(toList()));
}
@Test
public void testFloatSequence()
throws Exception
{
tester.testRoundTrip(REAL, floatSequence(0.0f, 0.1f, 30_000));
}
@Test
public void testFloatNaNInfinity()
throws Exception
{
tester.testRoundTrip(REAL, ImmutableList.of(1000.0f, -1.23f, Float.POSITIVE_INFINITY));
tester.testRoundTrip(REAL, ImmutableList.of(-1000.0f, Float.NEGATIVE_INFINITY, 1.23f));
tester.testRoundTrip(REAL, ImmutableList.of(0.0f, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
tester.testRoundTrip(REAL, ImmutableList.of(Float.NaN, -0.0f, 1.0f));
tester.testRoundTrip(REAL, ImmutableList.of(Float.NaN, -1.0f, Float.POSITIVE_INFINITY));
tester.testRoundTrip(REAL, ImmutableList.of(Float.NaN, Float.NEGATIVE_INFINITY, 1.0f));
tester.testRoundTrip(REAL, ImmutableList.of(Float.NaN, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
}
@Test
public void testDoubleSequence()
throws Exception
{
tester.testRoundTrip(DOUBLE, doubleSequence(0, 0.1, 30_000));
}
@Test
public void testDecimalSequence()
throws Exception
{
tester.testRoundTrip(DECIMAL_TYPE_PRECISION_2, decimalSequence("-30", "1", 60, 2, 1));
tester.testRoundTrip(DECIMAL_TYPE_PRECISION_4, decimalSequence("-3000", "1", 60_00, 4, 2));
tester.testRoundTrip(DECIMAL_TYPE_PRECISION_8, decimalSequence("-3000000", "100", 60_000, 8, 4));
tester.testRoundTrip(DECIMAL_TYPE_PRECISION_17, decimalSequence("-30000000000", "1000000", 60_000, 17, 8));
tester.testRoundTrip(DECIMAL_TYPE_PRECISION_18, decimalSequence("-30000000000", "1000000", 60_000, 18, 8));
tester.testRoundTrip(DECIMAL_TYPE_PRECISION_38, decimalSequence("-3000000000000000000", "100000000000000", 60_000, 38, 16));
}
@Test
public void testDoubleNaNInfinity()
throws Exception
{
tester.testRoundTrip(DOUBLE, ImmutableList.of(1000.0, -1.0, Double.POSITIVE_INFINITY));
tester.testRoundTrip(DOUBLE, ImmutableList.of(-1000.0, Double.NEGATIVE_INFINITY, 1.0));
tester.testRoundTrip(DOUBLE, ImmutableList.of(0.0, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
tester.testRoundTrip(DOUBLE, ImmutableList.of(Double.NaN, -1.0, 1.0));
tester.testRoundTrip(DOUBLE, ImmutableList.of(Double.NaN, -1.0, Double.POSITIVE_INFINITY));
tester.testRoundTrip(DOUBLE, ImmutableList.of(Double.NaN, Double.NEGATIVE_INFINITY, 1.0));
tester.testRoundTrip(DOUBLE, ImmutableList.of(Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
}
@Test
public void testStringUnicode()
throws Exception
{
tester.testRoundTrip(VARCHAR, newArrayList(limit(cycle(ImmutableList.of("apple", "apple pie", "apple\uD835\uDC03", "apple\uFFFD")), 30_000)));
}
@Test
public void testStringDirectSequence()
throws Exception
{
tester.testRoundTrip(
VARCHAR,
intsBetween(0, 30_000).stream()
.map(Object::toString)
.collect(toList()));
}
@Test
public void testStringDictionarySequence()
throws Exception
{
tester.testRoundTrip(
VARCHAR,
newArrayList(limit(cycle(ImmutableList.of(1, 3, 5, 7, 11, 13, 17)), 30_000)).stream()
.map(Object::toString)
.collect(toList()));
}
@Test
public void testStringStrideDictionary()
throws Exception
{
tester.testRoundTrip(VARCHAR, newArrayList(concat(ImmutableList.of("a"), nCopies(9999, "123"), ImmutableList.of("b"), nCopies(9999, "123"))));
}
@Test
public void testEmptyStringSequence()
throws Exception
{
tester.testRoundTrip(VARCHAR, newArrayList(limit(cycle(""), 30_000)));
}
@Test
public void testCharDirectSequence()
throws Exception
{
tester.testRoundTrip(
CHAR,
intsBetween(0, 30_000).stream()
.map(this::toCharValue)
.collect(toList()));
}
@Test
public void testCharDictionarySequence()
throws Exception
{
tester.testRoundTrip(
CHAR,
newArrayList(limit(cycle(ImmutableList.of(1, 3, 5, 7, 11, 13, 17)), 30_000)).stream()
.map(this::toCharValue)
.collect(toList()));
}
@Test
public void testEmptyCharSequence()
throws Exception
{
tester.testRoundTrip(CHAR, newArrayList(limit(cycle(" "), 30_000)));
}
private String toCharValue(Object value)
{
return Strings.padEnd(value.toString(), CHAR_LENGTH, ' ');
}
@Test
public void testBinaryDirectSequence()
throws Exception
{
tester.testRoundTrip(
VARBINARY,
intsBetween(0, 30_000).stream()
.map(Object::toString)
.map(string -> string.getBytes(UTF_8))
.map(SqlVarbinary::new)
.collect(toList())
);
}
@Test
public void testBinaryDictionarySequence()
throws Exception
{
tester.testRoundTrip(
VARBINARY, ImmutableList.copyOf(limit(cycle(ImmutableList.of(1, 3, 5, 7, 11, 13, 17)), 30_000)).stream()
.map(Object::toString)
.map(string -> string.getBytes(UTF_8))
.map(SqlVarbinary::new)
.collect(toList())
);
}
@Test
public void testEmptyBinarySequence()
throws Exception
{
tester.testRoundTrip(VARBINARY, nCopies(30_000, new SqlVarbinary(new byte[0])));
}
@Test
public void testDwrfInvalidCheckpointsForRowGroupDictionary()
throws Exception
{
List<Integer> values = newArrayList(limit(
cycle(concat(
ImmutableList.of(1), nCopies(9999, 123),
ImmutableList.of(2), nCopies(9999, 123),
ImmutableList.of(3), nCopies(9999, 123),
nCopies(1_000_000, null))),
200_000));
tester.assertRoundTrip(INTEGER, values);
tester.assertRoundTrip(
VARCHAR,
newArrayList(values).stream()
.map(value -> value == null ? null : String.valueOf(value))
.collect(toList())
);
}
@Test
public void testDwrfInvalidCheckpointsForStripeDictionary()
throws Exception
{
tester.testRoundTrip(
VARCHAR,
newArrayList(limit(cycle(ImmutableList.of(1, 3, 5, 7, 11, 13, 17)), 200_000)).stream()
.map(Object::toString)
.collect(toList())
);
}
private static <T> Iterable<T> skipEvery(int n, Iterable<T> iterable)
{
return () -> new AbstractIterator<T>()
{
private final Iterator<T> delegate = iterable.iterator();
private int position;
@Override
protected T computeNext()
{
while (true) {
if (!delegate.hasNext()) {
return endOfData();
}
T next = delegate.next();
position++;
if (position <= n) {
return next;
}
position = 0;
}
}
};
}
private static <T> Iterable<T> repeatEach(int n, Iterable<T> iterable)
{
return () -> new AbstractIterator<T>()
{
private final Iterator<T> delegate = iterable.iterator();
private int position;
private T value;
@Override
protected T computeNext()
{
if (position == 0) {
if (!delegate.hasNext()) {
return endOfData();
}
value = delegate.next();
}
position++;
if (position >= n) {
position = 0;
}
return value;
}
};
}
private static List<Double> doubleSequence(double start, double step, int items)
{
List<Double> values = new ArrayList<>();
double nextValue = start;
for (int i = 0; i < items; i++) {
values.add(nextValue);
nextValue += step;
}
return values;
}
private static List<Float> floatSequence(float start, float step, int items)
{
Builder<Float> values = ImmutableList.builder();
float nextValue = start;
for (int i = 0; i < items; i++) {
values.add(nextValue);
nextValue += step;
}
return values.build();
}
private static List<SqlDecimal> decimalSequence(String start, String step, int items, int precision, int scale)
{
BigInteger decimalStep = new BigInteger(step);
List<SqlDecimal> values = new ArrayList<>();
BigInteger nextValue = new BigInteger(start);
for (int i = 0; i < items; i++) {
values.add(new SqlDecimal(nextValue, precision, scale));
nextValue = nextValue.add(decimalStep);
}
return values;
}
private static ContiguousSet<Integer> intsBetween(int lowerInclusive, int upperExclusive)
{
return ContiguousSet.create(Range.openClosed(lowerInclusive, upperExclusive), DiscreteDomain.integers());
}
}