package org.rhq.cassandra.schema; import java.util.LinkedList; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import com.google.common.util.concurrent.RateLimiter; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * @author John Sanda */ public class RateMonitor implements Runnable { private static final Log log = LogFactory.getLog(RateMonitor.class); private static class RequestStats { public double requests; public double failedRequests; public RequestStats(double requests, double failedRequests) { this.requests = requests; this.failedRequests = failedRequests; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RequestStats that = (RequestStats) o; if (Double.compare(that.failedRequests, failedRequests) != 0) return false; if (Double.compare(that.requests, requests) != 0) return false; return true; } @Override public int hashCode() { int result; long temp; temp = Double.doubleToLongBits(requests); result = (int) (temp ^ (temp >>> 32)); temp = Double.doubleToLongBits(failedRequests); result = 31 * result + (int) (temp ^ (temp >>> 32)); return result; } } private static class AggregateRequestStats { public boolean thresholdExceeded; public double failedRequests; public AggregateRequestStats(boolean thresholdExceeded, double failedRequests) { this.thresholdExceeded = thresholdExceeded; this.failedRequests = failedRequests; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; AggregateRequestStats that = (AggregateRequestStats) o; if (failedRequests != that.failedRequests) return false; if (thresholdExceeded != that.thresholdExceeded) return false; return true; } @Override public int hashCode() { int result; long temp; result = (thresholdExceeded ? 1 : 0); temp = Double.doubleToLongBits(failedRequests); result = 31 * result + (int) (temp ^ (temp >>> 32)); return result; } } private static final double FAILURE_THRESHOLD = 0.01; private static final double MIN_READ_RATE = 25.0; private static final double MIN_WRITE_RATE = 2500; private static final double DEFAULT_WRITE_RATE_STEP_INCREASE = 25; private static final double DEFAULT_READ_RATE_STEP_INCREASE = 10; private static final double RATE_DECREASE_FACTOR = 0.9; private static final int DEFAULT_RATE_INCREASE_CHECKPOINT = 60; private static final int FIVE_SECOND_WINDOW_SIZE = 60; private static final int STABLE_RATE_WINDOW = 90; private LinkedList<RequestStats> oneSecondStats = new LinkedList<RequestStats>(); private LinkedList<AggregateRequestStats> fiveSecondStats = new LinkedList<AggregateRequestStats>(); private int stableRateTick; private AtomicInteger requests = new AtomicInteger(); private AtomicInteger failRequests = new AtomicInteger(); private boolean shutdown; private int warmUp = MigrateAggregateMetrics.DEFAULT_WARM_UP; private AtomicReference<RateLimiter> readPermitsRef; private AtomicReference<RateLimiter> writePermitsRef; private double writeRateStepIncrease = DEFAULT_WRITE_RATE_STEP_INCREASE; private double readRateStepIncrease = DEFAULT_READ_RATE_STEP_INCREASE; private int rateIncreaseCheckpoint = DEFAULT_RATE_INCREASE_CHECKPOINT; public RateMonitor(AtomicReference<RateLimiter> readPermitsRef, AtomicReference<RateLimiter> writePermitsRef) { this.readPermitsRef = readPermitsRef; this.writePermitsRef = writePermitsRef; } public void requestSucceeded() { requests.incrementAndGet(); } public void requestFailed() { failRequests.incrementAndGet(); requests.incrementAndGet(); } public void shutdown() { shutdown = true; } @Override public void run() { while (!shutdown) { try { if (requests.get() == 0) { continue; } oneSecondStats.addFirst(new RequestStats(requests.getAndSet(0), failRequests.getAndSet(0))); if (oneSecondStats.size() > 4) { aggregateStats(); if (isRateDecreaseNeeded()) { decreaseRates(); clearStats(); stableRateTick = 0; writeRateStepIncrease = DEFAULT_WRITE_RATE_STEP_INCREASE; readRateStepIncrease = DEFAULT_READ_RATE_STEP_INCREASE; rateIncreaseCheckpoint = DEFAULT_RATE_INCREASE_CHECKPOINT; } else if (fiveSecondStats.peek().thresholdExceeded) { increaseWarmup(); oneSecondStats.clear(); stableRateTick = 0; writeRateStepIncrease = DEFAULT_WRITE_RATE_STEP_INCREASE; readRateStepIncrease = DEFAULT_READ_RATE_STEP_INCREASE; rateIncreaseCheckpoint = DEFAULT_RATE_INCREASE_CHECKPOINT; } else if (isLongTermRateStable()) { writeRateStepIncrease += DEFAULT_WRITE_RATE_STEP_INCREASE; readRateStepIncrease += DEFAULT_READ_RATE_STEP_INCREASE; rateIncreaseCheckpoint = Math.max(30, rateIncreaseCheckpoint - 15); stableRateTick = 0; log.info("Rates are stable. The read rate step increase is now " + readRateStepIncrease + " . The write rate step increase is now " + writeRateStepIncrease + ". The rate increase checkpoint is now " + rateIncreaseCheckpoint); increaseRates(); clearStats(); } else if (isShortTermRateStable()) { increaseRates(); clearStats(); } } Thread.sleep(1000); } catch (InterruptedException e) { log.info("Stopping request monitoring due to interrupt", e); } catch (Exception e) { log.warn("There was an unexpected error", e); } } } protected void clearStats() { oneSecondStats.clear(); fiveSecondStats.clear(); } private void aggregateStats() { double totalRequests = 0; double totalFailures = 0; stableRateTick++; for (RequestStats stats : oneSecondStats) { totalRequests += stats.requests; totalFailures += stats.failedRequests; } fiveSecondStats.addFirst(new AggregateRequestStats((totalFailures / totalRequests) > FAILURE_THRESHOLD, totalFailures)); oneSecondStats.removeLast(); if (fiveSecondStats.size() > FIVE_SECOND_WINDOW_SIZE) { fiveSecondStats.removeLast(); } } private boolean isRateDecreaseNeeded() { if (fiveSecondStats.size() < 30) { return false; } int i = 0; int failures = 0; for (AggregateRequestStats stats : fiveSecondStats) { // We are looking for 3 occurrences of the threshold being exceeded in the last // 30 samples. if (failures > 2 || i > 29) { break; } if (stats.thresholdExceeded) { ++failures; } ++i; } return failures > 2; } private boolean isShortTermRateStable() { if (fiveSecondStats.size() < rateIncreaseCheckpoint) { return false; } int i = 0; for (AggregateRequestStats stats : fiveSecondStats) { if (stats.failedRequests > 0) { return false; } if (i > rateIncreaseCheckpoint - 1) { break; } ++i; } return true; } private boolean isLongTermRateStable() { return stableRateTick == STABLE_RATE_WINDOW; } private void decreaseRates() { double readRate = readPermitsRef.get().getRate(); double writeRate = Math.max(writePermitsRef.get().getRate(), MIN_WRITE_RATE); double newWriteRate = writeRate * RATE_DECREASE_FACTOR; double newReadRate = Math.max(newWriteRate * 0.04, MIN_READ_RATE); log.info("Decreasing request rates:\n" + readRate + " reads/sec --> " + newReadRate + " reads/sec\n" + writeRate + " writes/sec --> " + newWriteRate + " writes/sec\n"); warmUp = MigrateAggregateMetrics.DEFAULT_WARM_UP; readPermitsRef.set(RateLimiter.create(newReadRate, warmUp, TimeUnit.SECONDS)); writePermitsRef.set(RateLimiter.create(newWriteRate, warmUp, TimeUnit.SECONDS)); } private void increaseRates() { double readRate = readPermitsRef.get().getRate(); double writeRate = writePermitsRef.get().getRate(); double newWriteRate = writeRate + writeRateStepIncrease; double newReadRate = newWriteRate * 0.04; log.info("Increasing request rates:\n" + readRate + " reads/sec --> " + newReadRate + " reads/sec\n" + writeRate + " writes/sec --> " + newWriteRate + " writes/sec\n"); warmUp = MigrateAggregateMetrics.DEFAULT_WARM_UP; readPermitsRef.set(RateLimiter.create(newReadRate, warmUp, TimeUnit.SECONDS)); writePermitsRef.set(RateLimiter.create(newWriteRate, warmUp, TimeUnit.SECONDS)); } private void increaseWarmup() { warmUp *= 2; double readRate = readPermitsRef.get().getRate(); double writeRate = writePermitsRef.get().getRate(); log.info("Resetting request rates with new warm up of " + warmUp + " sec"); readPermitsRef.set(RateLimiter.create(readRate, warmUp, TimeUnit.SECONDS)); writePermitsRef.set(RateLimiter.create(writeRate, warmUp, TimeUnit.SECONDS)); } }