/**
* Copyright 2013 LiveRamp
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.liveramp.hank.storage.curly;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import com.liveramp.commons.util.BytesUtils;
import com.liveramp.hank.compression.cueball.CueballCompressionCodec;
import com.liveramp.hank.coordinator.mock.MockDomainVersion;
import com.liveramp.hank.hasher.Hasher;
import com.liveramp.hank.partitioner.Partitioner;
import com.liveramp.hank.storage.LocalPartitionRemoteFileOps;
public class TestDomainGenerator {
/**
* @param args
* @throws Exception
* @throws IllegalAccessException
* @throws InstantiationException
*/
public static void main(String[] args) throws InstantiationException, IllegalAccessException, Exception {
String outputPath = args[0];
int totalNumRecords = Integer.parseInt(args[1]);
int keyLength = Integer.parseInt(args[2]);
int hashLength = Integer.parseInt(args[3]);
int indexBits = Integer.parseInt(args[4]);
int valueLength = Integer.parseInt(args[5]);
String hasherClassName = args[6];
String compressionCodecClassName = args[7];
int numPartitions = Integer.parseInt(args[8]);
String partitionerClass = args[9];
final Class<? extends CueballCompressionCodec> codecClass = (Class<? extends CueballCompressionCodec>)Class.forName(compressionCodecClassName);
Partitioner p = (Partitioner)Class.forName(partitionerClass).newInstance();
Hasher h = (Hasher)Class.forName(hasherClassName).newInstance();
Map<Integer, List<byte[]>> partitionedKeys = new HashMap<Integer, List<byte[]>>();
for (int i = 0; i < numPartitions; i++) {
partitionedKeys.put(i, new ArrayList<byte[]>());
}
Map<byte[], byte[]> hashesToKeys = new HashMap<byte[], byte[]>();
Map<byte[], byte[]> hashesToValues = new HashMap<byte[], byte[]>();
Random r = new Random(7);
for (int i = 0; i < totalNumRecords; i++) {
byte[] key = new byte[keyLength];
r.nextBytes(key);
final int partitionNumber = p.partition(ByteBuffer.wrap(key), numPartitions);
byte[] hash = new byte[hashLength];
h.hash(ByteBuffer.wrap(key), hashLength, hash);
partitionedKeys.get(partitionNumber).add(hash);
hashesToKeys.put(hash, key);
byte[] valueBytes = new byte[valueLength];
r.nextBytes(valueBytes);
hashesToValues.put(hash, valueBytes);
}
final Curly curly = new Curly(hashLength, h, 10L * 1024 * 1024 * 1024,
indexBits, 32 * 1024, "", "", null, codecClass, null, 0, -1, null, -1, -1);
for (Map.Entry<Integer, List<byte[]>> part : partitionedKeys.entrySet()) {
Collections.sort(part.getValue(), new Comparator<byte[]>() {
@Override
public int compare(byte[] arg0, byte[] arg1) {
return BytesUtils.compareBytesUnsigned(ByteBuffer.wrap(arg0), ByteBuffer.wrap(arg1));
}
});
}
long start = System.currentTimeMillis();
for (Map.Entry<Integer, List<byte[]>> part : partitionedKeys.entrySet()) {
final CurlyWriter writer = (CurlyWriter)curly.getWriter(new MockDomainVersion(0, 0L),
new LocalPartitionRemoteFileOps(outputPath, part.getKey()), part.getKey());
for (int i = 0; i < part.getValue().size(); i++) {
final byte[] keyHash = part.getValue().get(i);
writer.write(ByteBuffer.wrap(hashesToKeys.get(keyHash)), ByteBuffer.wrap(hashesToValues.get(keyHash)));
}
writer.close();
}
long end = System.currentTimeMillis();
System.out.println("Elapsed ms: " + (end - start));
}
}