package com.amazonaws.bigdatablog.s3index; import static com.amazonaws.bigdatablog.s3index.Util.SYSIN; import static com.amazonaws.bigdatablog.s3index.Util.prompt; import static java.lang.Integer.parseInt; import static java.lang.Integer.toHexString; import static java.lang.System.currentTimeMillis; import static java.lang.Thread.interrupted; import static java.util.Collections.singletonMap; import static java.util.concurrent.TimeUnit.SECONDS; import static java.util.logging.Level.WARNING; import static java.util.logging.LogManager.getLogManager; import java.io.ByteArrayInputStream; import java.io.IOException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Collection; import java.util.HashSet; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.util.Base16; import com.amazonaws.util.Base64; import com.google.common.util.concurrent.RateLimiter; public class S3DataGenerator { private static final Random RANDOM = new Random(); private static final ThreadLocal<MessageDigest> DIGEST = new ThreadLocal<MessageDigest>() { @Override protected MessageDigest initialValue() { try { return MessageDigest.getInstance("MD5"); } catch (final NoSuchAlgorithmException e) { throw new RuntimeException(e); } } }; private static final AmazonS3 S3 = new AmazonS3Client(); private static final int AVG_OBJ_SIZE = 50; private final String bucket; private final RateLimiter limiter; private final String[] serverIds; private final int numCustomers; private final Thread orchestrator = new Thread(this::generateData); private final ExecutorService exec = Executors.newCachedThreadPool(); private final AtomicInteger objectCount = new AtomicInteger(); private long startTime; public static void main(String[] args) throws IOException, InterruptedException { getLogManager().getLogger("").setLevel(WARNING); final String bucket = prompt("S3 bucket"); final int numServers = parseInt(prompt("Number of servers")); final int objectsPerSecond = parseInt(prompt("Upload rate (objects/sec)")); final S3DataGenerator generator = new S3DataGenerator(bucket, objectsPerSecond, numServers); System.out.println("Generating objects..."); generator.start(); System.out.println("Hit Enter to quit"); SYSIN.readLine(); System.out.println("Shutting down..."); generator.stop(); System.out.println("Shutdown complete."); } public S3DataGenerator(String bucket, int objectsPerSecond, int numServers) { this.bucket = bucket; limiter = RateLimiter.create(objectsPerSecond); serverIds = generateServerIds(numServers); // Calculate number of customers such that each server will produce a data // object for approximately 10% of the customers each minute this.numCustomers = (int) (60. * objectsPerSecond / numServers / 0.1); } private void start() { startTime = currentTimeMillis(); orchestrator.start(); } private void stop() throws InterruptedException { orchestrator.interrupt(); orchestrator.join(); exec.shutdown(); final boolean completed = exec.awaitTermination(5, SECONDS); if (!completed) { exec.shutdownNow(); } } private void generateData() { long ts = currentTimeMillis(); while (!interrupted()) { try { generateMinute(ts); } catch (final InterruptedException e) { return; } ts += 60 * 1000; } } private void generateMinute(final long ts) throws InterruptedException { for (final String serverId : serverIds) { generateServerData(serverId, ts); } } private void generateServerData(String serverId, long ts) throws InterruptedException { for (final int customerId : selectCustomers()) { limiter.acquire(); if (interrupted()) { throw new InterruptedException(); } exec.execute(() -> { generateDataFile(serverId, customerId, ts); }); } } private void generateDataFile(String serverId, int customerId, long ts) { final byte[] content = generateContent(); final String key = calcKey(serverId, customerId, ts); final boolean hasTransaction = RANDOM.nextDouble() < 0.1; final Map<String, String> userMetadata = singletonMap("hastransaction", Boolean.toString(hasTransaction)); putObject(key, content, userMetadata); complete(); } private void putObject(final String key, final byte[] content, Map<String, String> userMetadata) { final ObjectMetadata metadata = new ObjectMetadata(); metadata.setContentLength(content.length); metadata.setContentMD5(md5b64(content)); metadata.setUserMetadata(userMetadata); final PutObjectRequest request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(content), metadata); S3.putObject(request); } private void complete() { final int totalObjects = objectCount.incrementAndGet(); if (totalObjects % (limiter.getRate() * 10) == 0) { final long duration = currentTimeMillis() - startTime; System.out.printf("Generated %d objects in %d seconds (%.1f objects/sec)\n", totalObjects, duration / 1000, 1000. * totalObjects / duration); } } private String calcKey(String serverId, int customerId, long ts) { final String key = String.format("%s/%tY-%<tm-%<td-%<tH-%<tM/%05d-%2$d.data", serverId, ts, customerId); final String prefix = md5hex(key.getBytes()).substring(0, 4); return prefix + "/" + key; } private byte[] generateContent() { int size = (int) (RANDOM.nextGaussian() * AVG_OBJ_SIZE / 3 + AVG_OBJ_SIZE); if (size < 1) { size = 1; } final byte[] content = new byte[size]; RANDOM.nextBytes(content); return content; } private Collection<Integer> selectCustomers() { final Set<Integer> customers = new HashSet<>(); while (customers.size() < 0.1 * numCustomers) { customers.add(RANDOM.nextInt(numCustomers) + 1); } return customers; } private static String[] generateServerIds(int numServers) { final String[] serverIds = new String[numServers]; for (int i = 0; i < numServers; i++) { serverIds[i] = "i-" + toHexString(RANDOM.nextInt()).toLowerCase(); } return serverIds; } private static String md5b64(byte[] content) { return Base64.encodeAsString(md5(content)); } private static String md5hex(byte[] content) { return Base16.encodeAsString(md5(content)); } private static byte[] md5(byte[] content) { final MessageDigest digest = DIGEST.get(); digest.reset(); final byte[] md5 = digest.digest(content); return md5; } }