/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.CollectionUtils;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import com.google.common.base.Joiner;
/**
* Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
* collection of Cells.
*
* 1. add longs to a HashSet for fast de-duplication
* 2. keep track of the min and max
* 3. copy all values to a new long[]
* 4. Collections.sort the long[]
* 5. calculate maxDelta = max - min
* 6. determine FInt width based on maxDelta
* 7. PrefixTreeEncoder binary searches to find index of each value
*/
@InterfaceAudience.Private
public class LongEncoder {
/****************** fields ****************************/
protected HashSet<Long> uniqueValues;
protected long[] sortedUniqueValues;
protected long min, max, maxDelta;
protected int bytesPerDelta;
protected int bytesPerIndex;
protected int totalCompressedBytes;
/****************** construct ****************************/
public LongEncoder() {
this.uniqueValues = new HashSet<>();
}
public void reset() {
uniqueValues.clear();
sortedUniqueValues = null;
min = Long.MAX_VALUE;
max = Long.MIN_VALUE;
maxDelta = Long.MIN_VALUE;
bytesPerIndex = 0;
bytesPerDelta = 0;
totalCompressedBytes = 0;
}
/************* methods ***************************/
public void add(long timestamp) {
uniqueValues.add(timestamp);
}
public LongEncoder compile() {
int numUnique = uniqueValues.size();
if (numUnique == 1) {
min = CollectionUtils.getFirst(uniqueValues);
sortedUniqueValues = new long[] { min };
return this;
}
sortedUniqueValues = new long[numUnique];
int lastIndex = -1;
for (long value : uniqueValues) {
sortedUniqueValues[++lastIndex] = value;
}
Arrays.sort(sortedUniqueValues);
min = ArrayUtils.getFirst(sortedUniqueValues);
max = ArrayUtils.getLast(sortedUniqueValues);
maxDelta = max - min;
if (maxDelta > 0) {
bytesPerDelta = UFIntTool.numBytes(maxDelta);
} else {
bytesPerDelta = 0;
}
int maxIndex = numUnique - 1;
bytesPerIndex = UFIntTool.numBytes(maxIndex);
totalCompressedBytes = numUnique * bytesPerDelta;
return this;
}
public long getDelta(int index) {
if (sortedUniqueValues.length == 0) {
return 0;
}
return sortedUniqueValues[index] - min;
}
public int getIndex(long value) {
// should always find an exact match
return Arrays.binarySearch(sortedUniqueValues, value);
}
public void writeBytes(OutputStream os) throws IOException {
for (int i = 0; i < sortedUniqueValues.length; ++i) {
long delta = sortedUniqueValues[i] - min;
UFIntTool.writeBytes(bytesPerDelta, delta, os);
}
}
//convenience method for tests
public byte[] getByteArray() throws IOException{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
writeBytes(baos);
return baos.toByteArray();
}
public int getOutputArrayLength() {
return sortedUniqueValues.length * bytesPerDelta;
}
public int getNumUniqueValues() {
return sortedUniqueValues.length;
}
/******************* Object methods **********************/
@Override
public String toString() {
if (ArrayUtils.isEmpty(sortedUniqueValues)) {
return "[]";
}
return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
}
/******************** get/set **************************/
public long getMin() {
return min;
}
public int getBytesPerDelta() {
return bytesPerDelta;
}
public int getBytesPerIndex() {
return bytesPerIndex;
}
public int getTotalCompressedBytes() {
return totalCompressedBytes;
}
public long[] getSortedUniqueTimestamps() {
return sortedUniqueValues;
}
}