/**
* Copyright 2012 LiveRamp
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.liveramp.hank.performance;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import com.liveramp.hank.compression.cueball.NoCueballCompressionCodec;
import com.liveramp.hank.coordinator.DomainVersion;
import com.liveramp.hank.coordinator.mock.MockDomainVersion;
import com.liveramp.hank.hasher.Hasher;
import com.liveramp.hank.storage.LocalPartitionRemoteFileOps;
import com.liveramp.hank.storage.PartitionRemoteFileOps;
import com.liveramp.hank.storage.Writer;
import com.liveramp.hank.storage.cueball.Cueball;
import com.liveramp.hank.storage.cueball.CueballFilePath;
import com.liveramp.hank.storage.cueball.CueballMerger;
import com.liveramp.hank.storage.incremental.IncrementalDomainVersionProperties;
import com.liveramp.hank.util.EncodingHelper;
import com.liveramp.hank.util.FormatUtils;
import com.liveramp.hank.util.HankTimer;
import com.liveramp.hank.util.IOStreamUtils;
public class PerformanceTestCueball {
private static final int VALUE_SIZE = 16;
private static final int KEY_SIZE = 20;
private static final int KEY_HASH_SIZE = KEY_SIZE;
private static final int HASH_INDEX_BITS = 16;
private static final int NUM_RECORDS_PER_BLOCK = 1000;
private static Cueball getCueball(String localTmpDir) {
String root = localTmpDir + "/remote_domain_root";
return new Cueball(
KEY_HASH_SIZE, new KeyHasher(HASH_INDEX_BITS), VALUE_SIZE, HASH_INDEX_BITS, root, root,
new LocalPartitionRemoteFileOps.Factory(), NoCueballCompressionCodec.class, null, 0);
}
private static long getNumTotalRecords() {
return (1 << HASH_INDEX_BITS) * NUM_RECORDS_PER_BLOCK;
}
public static void testPerformanceCueballWriter(String localTmpDir) throws IOException {
// Fill in all indexable blocks
long numRecords = getNumTotalRecords();
Writer writer = getCueball(localTmpDir).getWriter(
new MockDomainVersion(0, 0L, new IncrementalDomainVersionProperties.Base()),
new LocalPartitionRemoteFileOps(localTmpDir, 0), 0);
HankTimer timer = new HankTimer();
for (long i = 0; i < numRecords; ++i) {
writer.write(key(i, KEY_SIZE), value(i, VALUE_SIZE));
}
writer.close();
double elapsedMs = timer.getDurationMs();
double elapsedSecs = elapsedMs / 1000.0;
long totalBytes = numRecords * (KEY_HASH_SIZE + VALUE_SIZE);
System.out.println("Test took " + elapsedMs + "ms, wrote " + numRecords + " records totalling " + FormatUtils.formatNumBytes(totalBytes));
System.out.println(String.format("Throughput: %.2f writes/sec", numRecords / elapsedSecs));
System.out.println("Throughput: " + FormatUtils.formatDataThroughput(totalBytes / elapsedSecs));
createZeroFile(localTmpDir + "/zero_file0", numRecords, IOStreamUtils.DEFAULT_BUFFER_SIZE);
createZeroFile(localTmpDir + "/zero_file1", numRecords, 10 << 20);
}
private static void createZeroFile(String path, long numRecords, int bufferSize) throws IOException {
FileOutputStream zeroFile = new FileOutputStream(path);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(zeroFile, bufferSize);
byte[] record = new byte[KEY_HASH_SIZE + VALUE_SIZE];
HankTimer timer = new HankTimer();
for (int i = 0; i < numRecords; ++i) {
bufferedOutputStream.write(record);
}
bufferedOutputStream.close();
zeroFile.close();
double elapsedSecs = timer.getDurationMs() / 1000.0;
long totalBytes = numRecords * record.length;
System.out.println("Throughput to write zero file of size " + FormatUtils.formatNumBytes(totalBytes)
+ " with buffer of " + FormatUtils.formatNumBytes(bufferSize)
+ ": " + FormatUtils.formatDataThroughput(totalBytes / elapsedSecs));
}
private static void prepareTestPerformanceCueballMerger(String localTmpDir,
DomainVersion baseVersion,
DomainVersion deltaVersion) throws IOException {
PartitionRemoteFileOps partitionRemoteFileOps = new LocalPartitionRemoteFileOps(localTmpDir, 0);
long numRecords = getNumTotalRecords();
int deltaFrequency = 1000000; // num records in delta = numRecords / deltaFrequency
Cueball cueball = getCueball(localTmpDir);
// Create base
Writer baseWriter = cueball.getWriter(
baseVersion,
partitionRemoteFileOps, 0);
for (long i = 0; i < numRecords; ++i) {
if (i % deltaFrequency != 0) {
baseWriter.write(key(i, KEY_SIZE), value(i, VALUE_SIZE));
}
}
baseWriter.close();
// Create delta
Writer deltaWriter = cueball.getWriter(
deltaVersion,
partitionRemoteFileOps, 0);
for (long i = 0; i < numRecords; ++i) {
if (i % deltaFrequency == 0) {
deltaWriter.write(key(i, KEY_SIZE), value(i, VALUE_SIZE));
}
}
deltaWriter.close();
}
private static void doTestPerformanceCueballMerger(String localTmpDir,
DomainVersion baseVersion,
DomainVersion deltaVersion,
DomainVersion newBaseVersion) throws IOException {
HankTimer timer = new HankTimer();
new CueballMerger().merge(
new CueballFilePath(localTmpDir + "/0/" + Cueball.getName(baseVersion)),
Collections.singletonList(new CueballFilePath(localTmpDir + "/0/" + Cueball.getName(deltaVersion))),
localTmpDir + "/" + Cueball.getName(newBaseVersion),
KEY_HASH_SIZE,
VALUE_SIZE,
null,
HASH_INDEX_BITS,
new NoCueballCompressionCodec());
double elapsedS = timer.getDurationMs() / 1000.0;
System.out.println("Merge done in " + elapsedS + " seconds");
}
public static void testPerformanceCueballMerger(String localTmpDir) throws IOException {
DomainVersion baseVersion = new MockDomainVersion(0, 0L, new IncrementalDomainVersionProperties.Base());
DomainVersion deltaVersion = new MockDomainVersion(1, 0L, new IncrementalDomainVersionProperties.Delta(0));
DomainVersion newBaseVersion = new MockDomainVersion(1, 0L, new IncrementalDomainVersionProperties.Base());
prepareTestPerformanceCueballMerger(localTmpDir, baseVersion, deltaVersion);
doTestPerformanceCueballMerger(localTmpDir, baseVersion, deltaVersion, newBaseVersion);
}
private static ByteBuffer key(long key, int keySize) {
byte[] keyBytes = new byte[keySize];
EncodingHelper.encodeLittleEndianFixedWidthLong(key, keyBytes);
return ByteBuffer.wrap(keyBytes);
}
private static ByteBuffer value(long value, int valueSize) {
byte[] v = new byte[valueSize];
Arrays.fill(v, (byte)value);
return ByteBuffer.wrap(v);
}
// Hash function designed to read in longs and output hashes that conserve
// the same ordering and satisfy the hash index
private static class KeyHasher implements Hasher {
private final int hashIndexBits;
public KeyHasher(int hashIndexBits) {
this.hashIndexBits = hashIndexBits;
}
@Override
public void hash(ByteBuffer keyBytes, int keySize, byte[] hashBytes) {
long key = EncodingHelper.decodeLittleEndianFixedWidthLong(keyBytes);
if (hashIndexBits % 8 != 0) {
throw new RuntimeException("hashIndexBits must be a multiple of 8");
}
long keyBlock = key / NUM_RECORDS_PER_BLOCK;
long keyIndex = key % NUM_RECORDS_PER_BLOCK;
byte[] keyHashBytes = new byte[keySize];
// Note: this is valid because hashIndexBits must be a multiple of 8
int hashIndexBytes = hashIndexBits / 8;
// Encode bytes for key block
EncodingHelper.encodeLittleEndianFixedWidthLong(keyBlock, keyHashBytes, keyHashBytes.length - hashIndexBytes, hashIndexBytes);
// Encode bytes for key index in block
EncodingHelper.encodeLittleEndianFixedWidthLong(keyIndex, keyHashBytes, 0, keyHashBytes.length - hashIndexBytes);
for (int i = 0; i < keyHashBytes.length; ++i) {
hashBytes[i] = (byte)(0xff & keyHashBytes[keyHashBytes.length - 1 - i]);
}
}
}
public static void main(String[] args) throws IOException {
testPerformanceCueballWriter(args[0]);
testPerformanceCueballMerger(args[0]);
}
}