/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.rcfile;
import com.facebook.presto.spi.Page;
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.type.StandardTypes;
import com.facebook.presto.spi.type.Type;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.airlift.slice.XxHash64;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
public class RcFileWriteValidation
{
private final byte version;
private final Map<String, String> metadata;
private final Optional<String> codecClassName;
private final long syncFirst;
private final long syncSecond;
private final WriteChecksum checksum;
private RcFileWriteValidation(byte version, Map<String, String> metadata, Optional<String> codecClassName, long syncFirst, long syncSecond, WriteChecksum checksum)
{
this.version = version;
this.metadata = ImmutableMap.copyOf(requireNonNull(metadata, "metadata is null"));
this.codecClassName = requireNonNull(codecClassName, "codecClassName is null");
this.syncFirst = syncFirst;
this.syncSecond = syncSecond;
this.checksum = requireNonNull(checksum, "checksum is null");
}
public byte getVersion()
{
return version;
}
public Map<String, String> getMetadata()
{
return metadata;
}
public Optional<String> getCodecClassName()
{
return codecClassName;
}
public long getSyncFirst()
{
return syncFirst;
}
public long getSyncSecond()
{
return syncSecond;
}
public WriteChecksum getChecksum()
{
return checksum;
}
public static class WriteChecksum
{
private final long totalRowCount;
private final long rowGroupHash;
private final List<Long> columnHashes;
public WriteChecksum(long totalRowCount, long rowGroupHash, List<Long> columnHashes)
{
this.totalRowCount = totalRowCount;
this.rowGroupHash = rowGroupHash;
this.columnHashes = columnHashes;
}
public long getTotalRowCount()
{
return totalRowCount;
}
public long getRowGroupHash()
{
return rowGroupHash;
}
public List<Long> getColumnHashes()
{
return columnHashes;
}
}
public static class WriteChecksumBuilder
{
// This value is a large arbitrary prime
private static final long NULL_HASH_CODE = 0x6e3efbd56c16a0cbL;
private final List<Type> types;
private long totalRowCount;
private final List<XxHash64> columnHashes;
private final XxHash64 rowGroupHash = new XxHash64();
private final byte[] longBuffer = new byte[Long.BYTES];
private final Slice longSlice = Slices.wrappedBuffer(longBuffer);
private WriteChecksumBuilder(List<Type> types)
{
this.types = ImmutableList.copyOf(requireNonNull(types, "types is null"));
ImmutableList.Builder<XxHash64> columnHashes = ImmutableList.builder();
for (Type ignored : types) {
columnHashes.add(new XxHash64());
}
this.columnHashes = columnHashes.build();
}
public static WriteChecksumBuilder createWriteChecksumBuilder(Map<Integer, Type> readColumns)
{
requireNonNull(readColumns, "readColumns is null");
checkArgument(!readColumns.isEmpty(), "readColumns is empty");
int columnCount = readColumns.keySet().stream()
.mapToInt(Integer::intValue)
.max().getAsInt() + 1;
checkArgument(readColumns.size() == columnCount, "checksum requires all columns to be read");
ImmutableList.Builder<Type> types = ImmutableList.builder();
for (int column = 0; column < columnCount; column++) {
Type type = readColumns.get(column);
checkArgument(type != null, "checksum requires all columns to be read");
types.add(type);
}
return new WriteChecksumBuilder(types.build());
}
public void addRowGroup(int rowCount)
{
longSlice.setInt(0, rowCount);
rowGroupHash.update(longBuffer, 0, Integer.BYTES);
}
public void addPage(Page page)
{
requireNonNull(page, "page is null");
checkArgument(page.getChannelCount() == columnHashes.size(), "invalid page");
totalRowCount += page.getPositionCount();
for (int channel = 0; channel < columnHashes.size(); channel++) {
Type type = types.get(channel);
Block block = page.getBlock(channel);
XxHash64 xxHash64 = columnHashes.get(channel);
for (int position = 0; position < block.getPositionCount(); position++) {
long hash = hashPositionSkipNullMapKeys(type, block, position);
longSlice.setLong(0, hash);
xxHash64.update(longBuffer);
}
}
}
private static long hashPositionSkipNullMapKeys(Type type, Block block, int position)
{
if (block.isNull(position)) {
return NULL_HASH_CODE;
}
if (type.getTypeSignature().getBase().equals(StandardTypes.MAP)) {
Type keyType = type.getTypeParameters().get(0);
Type valueType = type.getTypeParameters().get(1);
Block mapBlock = (Block) type.getObject(block, position);
long hash = 0;
for (int i = 0; i < mapBlock.getPositionCount(); i += 2) {
if (!mapBlock.isNull(i)) {
hash += hashPositionSkipNullMapKeys(keyType, mapBlock, i);
hash += hashPositionSkipNullMapKeys(valueType, mapBlock, i + 1);
}
}
return hash;
}
if (type.getTypeSignature().getBase().equals(StandardTypes.ARRAY)) {
Type elementType = type.getTypeParameters().get(0);
Block array = (Block) type.getObject(block, position);
long hash = 0;
for (int i = 0; i < array.getPositionCount(); i++) {
hash = 31 * hash + hashPositionSkipNullMapKeys(elementType, array, i);
}
return hash;
}
if (type.getTypeSignature().getBase().equals(StandardTypes.ROW)) {
Block row = (Block) type.getObject(block, position);
long hash = 0;
for (int i = 0; i < row.getPositionCount(); i++) {
Type elementType = type.getTypeParameters().get(i);
hash = 31 * hash + hashPositionSkipNullMapKeys(elementType, row, i);
}
return hash;
}
return type.hash(block, position);
}
public WriteChecksum build()
{
return new WriteChecksum(
totalRowCount,
rowGroupHash.hash(),
columnHashes.stream()
.map(XxHash64::hash)
.collect(toList()));
}
}
public static class RcFileWriteValidationBuilder
{
private byte version;
private final Map<String, String> metadata = new HashMap<>();
private Optional<String> codecClassName;
private long syncFirst;
private long syncSecond;
private final WriteChecksumBuilder checksum;
public RcFileWriteValidationBuilder(List<Type> types)
{
this.checksum = new WriteChecksumBuilder(types);
}
public RcFileWriteValidationBuilder setVersion(byte version)
{
this.version = version;
return this;
}
public RcFileWriteValidationBuilder addMetadataProperty(String key, String value)
{
metadata.put(key, value);
return this;
}
public RcFileWriteValidationBuilder setCodecClassName(Optional<String> codecClassName)
{
this.codecClassName = codecClassName;
return this;
}
public RcFileWriteValidationBuilder setSyncFirst(long syncFirst)
{
this.syncFirst = syncFirst;
return this;
}
public RcFileWriteValidationBuilder setSyncSecond(long syncSecond)
{
this.syncSecond = syncSecond;
return this;
}
public RcFileWriteValidationBuilder addRowGroup(int rowCount)
{
checksum.addRowGroup(rowCount);
return this;
}
public RcFileWriteValidationBuilder addPage(Page page)
{
checksum.addPage(page);
return this;
}
public RcFileWriteValidation build()
{
return new RcFileWriteValidation(version, metadata, codecClassName, syncFirst, syncSecond, checksum.build());
}
}
}