/** * **************************************************************************** * Copyright (c) 2010-2016 by Min Cai (min.cai.china@gmail.com). * <p> * This file is part of the Archimulator multicore architectural simulator. * <p> * Archimulator is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * <p> * Archimulator is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * <p> * You should have received a copy of the GNU General Public License * along with Archimulator. If not, see <http://www.gnu.org/licenses/>. * **************************************************************************** */ package archimulator.uncore.cache.partitioning.mlpAware; import archimulator.common.SimulationType; import archimulator.common.report.ReportNode; import archimulator.core.event.DynamicInstructionCommittedEvent; import archimulator.uncore.MemoryHierarchyAccess; import archimulator.uncore.cache.EvictableCache; import archimulator.uncore.cache.partitioning.CachePartitioningHelper; import archimulator.uncore.cache.partitioning.LRUStack; import archimulator.uncore.cache.partitioning.MemoryLatencyMeter; import archimulator.uncore.coherence.event.GeneralCacheControllerServiceNonblockingRequestEvent; import archimulator.uncore.coherence.event.LastLevelCacheControllerLineInsertEvent; import archimulator.uncore.mlp.PendingL2Hit; import archimulator.uncore.mlp.PendingL2Miss; import archimulator.util.Pair; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; /** * Memory level parallelism (MLP) aware cache partitioning helper. * * @author Min Cai */ public class MLPAwareCachePartitioningHelper extends CachePartitioningHelper { private Map<Integer, PendingL2Miss> pendingL2Misses; private Map<Integer, Map<Integer, PendingL2Hit>> pendingL2Hits; private L2AccessMLPCostProfile l2AccessMLPCostProfile; private MemoryLatencyMeter memoryLatencyMeter; private Map<Integer, MLPAwareStackDistanceProfile> mlpAwareStackDistanceProfiles; private Map<Integer, Map<Integer, LRUStack>> lruStacks; private Function<Double, Integer> mlpCostQuantizer; private List<List<Integer>> partitions; /** * Create an MLP aware cache partitioning helper. * * @param cache the cache */ public MLPAwareCachePartitioningHelper(EvictableCache<?> cache) { super(cache); this.pendingL2Misses = new LinkedHashMap<>(); this.pendingL2Hits = new LinkedHashMap<>(); this.l2AccessMLPCostProfile = new L2AccessMLPCostProfile(cache.getAssociativity()); this.memoryLatencyMeter = new MemoryLatencyMeter(); this.mlpAwareStackDistanceProfiles = new LinkedHashMap<>(); this.lruStacks = new LinkedHashMap<>(); this.mlpCostQuantizer = rawValue -> { if (rawValue < 0) { throw new IllegalArgumentException(); } if (rawValue <= 42) { return 0; } else if (rawValue <= 85) { return 1; } else if (rawValue <= 128) { return 2; } else if (rawValue <= 170) { return 3; } else if (rawValue <= 213) { return 4; } else if (rawValue <= 246) { return 5; } else if (rawValue <= 300) { return 6; } else { return 7; } }; cache.getBlockingEventDispatcher().addListener(GeneralCacheControllerServiceNonblockingRequestEvent.class, event -> { if (event.getCacheController() == getL2Controller() && shouldInclude(event.getSet())) { if (!event.isHitInCache()) { profileBeginServicingL2Miss(event.getAccess()); } else { profileBeginServicingL2Hit(event.getAccess()); } } }); cache.getBlockingEventDispatcher().addListener(LastLevelCacheControllerLineInsertEvent.class, event -> { if (event.getCacheController() == getL2Controller() && pendingL2Misses.containsKey(event.getAccess().getPhysicalTag())) { profileEndServicingL2Miss(event.getAccess()); } }); cache.getCycleAccurateEventQueue().getPerCycleEvents().add(() -> { if (cache.getSimulation().getType() != SimulationType.FAST_FORWARD) { updateL2AccessMlpCostsPerCycle(); updateL2HitElapsedCyclesPerCycle(); freeInvalidL2HitsPerCycle(); } }); cache.getBlockingEventDispatcher().addListener(DynamicInstructionCommittedEvent.class, event -> { if (pendingL2Hits.containsKey(getThreadIdentifier(event.getDynamicInstruction().getThread()))) { pendingL2Hits.get(getThreadIdentifier(event.getDynamicInstruction().getThread())).values() .forEach(PendingL2Hit::incrementNumCommittedInstructionsSinceAccess); } }); } /** * New interval. */ @Override protected void newInterval() { this.setPartition(this.getOptimalMlpCostSumAndPartition().getSecond()); this.mlpAwareStackDistanceProfiles.values().forEach(MLPAwareStackDistanceProfile::newInterval); } /** * To be invoked per cycle for updating MLP-costs for in-flight L2 cache accesses. */ private void updateL2AccessMlpCostsPerCycle() { for (PendingL2Miss pendingL2Miss : this.pendingL2Misses.values()) { pendingL2Miss.setMlpCost(pendingL2Miss.getMlpCost() + (double) 1 / this.l2AccessMLPCostProfile.getN(pendingL2Miss.getStackDistance())); } for (Map<Integer, PendingL2Hit> pendingL2HitsPerThread : this.pendingL2Hits.values()) { for (PendingL2Hit pendingL2Hit : pendingL2HitsPerThread.values()) { pendingL2Hit.setMlpCost(pendingL2Hit.getMlpCost() + (double) 1 / this.l2AccessMLPCostProfile.getN(pendingL2Hit.getStackDistance())); } } } /** * To be invoked per cycle for updating elapsed cycles for in-flight L2 cache hits. */ private void updateL2HitElapsedCyclesPerCycle() { for (Map<Integer, PendingL2Hit> pendingL2HitsPerThread : this.pendingL2Hits.values()) { pendingL2HitsPerThread.values().forEach(PendingL2Hit::incrementNumCyclesElapsedSinceAccess); } } /** * To be invoked per cycle for freeing invalid in-flight L2 cache hits. */ private void freeInvalidL2HitsPerCycle() { for (Map<Integer, PendingL2Hit> pendingL2HitsPerThread : this.pendingL2Hits.values()) { List<Integer> tagsToFree = pendingL2HitsPerThread.values().stream() .filter(pendingL2Hit -> pendingL2Hit.getNumCommittedInstructionsSinceAccess() >= this.getL2Controller().getExperiment().getConfig().getReorderBufferCapacity() || pendingL2Hit.getNumCyclesElapsedSinceAccess() >= memoryLatencyMeter.getAverageLatency()) .map(pendingL2Hit -> pendingL2Hit.getAccess().getPhysicalTag()).collect(Collectors.toList()); for (int tag : tagsToFree) { profileEndServicingL2Hit(pendingL2HitsPerThread.get(tag).getAccess()); } } } /** * Profile the beginning of servicing an L2 cache miss. * * @param access the memory hierarchy access */ private void profileBeginServicingL2Miss(MemoryHierarchyAccess access) { int tag = access.getPhysicalTag(); int set = this.getL2Controller().getCache().getSet(tag); LRUStack lruStack = getLruStack(set, getThreadIdentifier(access.getThread())); final int stackDistance = lruStack.access(tag); PendingL2Miss pendingL2Miss = new PendingL2Miss(access, getL2Controller().getCycleAccurateEventQueue().getCurrentCycle()) { { setStackDistance(stackDistance); } }; this.pendingL2Misses.put(tag, pendingL2Miss); this.l2AccessMLPCostProfile.incrementCounter(stackDistance); } /** * Profile the end of servicing an L2 cache miss. * * @param access the memory hierarchy access */ private void profileEndServicingL2Miss(MemoryHierarchyAccess access) { int tag = access.getPhysicalTag(); int set = this.getL2Controller().getCache().getSet(tag); PendingL2Miss pendingL2Miss = this.pendingL2Misses.get(tag); pendingL2Miss.setEndCycle(this.getL2Controller().getCycleAccurateEventQueue().getCurrentCycle()); this.l2AccessMLPCostProfile.decrementCounter(pendingL2Miss.getStackDistance()); this.pendingL2Misses.remove(tag); this.memoryLatencyMeter.newSample(pendingL2Miss.getNumCycles()); MLPAwareStackDistanceProfile mlpAwareStackDistanceProfile = this.getMlpAwareStackDistanceProfile(getThreadIdentifier(access.getThread())); if (pendingL2Miss.getStackDistance() == -1) { mlpAwareStackDistanceProfile.incrementMissCounter(this.mlpCostQuantizer.apply(pendingL2Miss.getMlpCost())); } else { mlpAwareStackDistanceProfile.incrementHitCounter(pendingL2Miss.getStackDistance(), this.mlpCostQuantizer.apply(pendingL2Miss.getMlpCost())); } } /** * Profile the beginning of servicing an L2 cache hit. * * @param access the memory hierarchy access */ private void profileBeginServicingL2Hit(MemoryHierarchyAccess access) { int tag = access.getPhysicalTag(); int set = this.getL2Controller().getCache().getSet(tag); LRUStack lruStack = getLruStack(set, getThreadIdentifier(access.getThread())); final int stackDistance = lruStack.access(tag); PendingL2Hit pendingL2Hit = new PendingL2Hit(access, getL2Controller().getCycleAccurateEventQueue().getCurrentCycle()) { { setStackDistance(stackDistance); } }; if (!this.pendingL2Hits.containsKey(getThreadIdentifier(access.getThread()))) { this.pendingL2Hits.put(getThreadIdentifier(access.getThread()), new LinkedHashMap<>()); } this.pendingL2Hits.get(getThreadIdentifier(access.getThread())).put(tag, pendingL2Hit); this.l2AccessMLPCostProfile.incrementCounter(stackDistance); } /** * Profile the end of servicing an L2 cache hit. * * @param access the memory hierarchy access */ private void profileEndServicingL2Hit(MemoryHierarchyAccess access) { int tag = access.getPhysicalTag(); PendingL2Hit pendingL2Hit = this.pendingL2Hits.get(getThreadIdentifier(access.getThread())).get(tag); pendingL2Hit.setEndCycle(this.getL2Controller().getCycleAccurateEventQueue().getCurrentCycle()); this.l2AccessMLPCostProfile.decrementCounter(pendingL2Hit.getStackDistance()); this.pendingL2Hits.get(getThreadIdentifier(access.getThread())).remove(tag); MLPAwareStackDistanceProfile mlpAwareStackDistanceProfile = this.getMlpAwareStackDistanceProfile(getThreadIdentifier(access.getThread())); if (pendingL2Hit.getStackDistance() == -1) { mlpAwareStackDistanceProfile.incrementMissCounter(this.mlpCostQuantizer.apply(pendingL2Hit.getMlpCost())); } else { mlpAwareStackDistanceProfile.incrementHitCounter(pendingL2Hit.getStackDistance(), this.mlpCostQuantizer.apply(pendingL2Hit.getMlpCost())); } } /** * Get the LRU stack for the specified thread ID and set index in the L2 cache. * * @param threadId the thread ID * @param set the set index * @return the LRU stack for the specified thread ID and set index in the L2 cache */ private LRUStack getLruStack(int threadId, int set) { if (!this.lruStacks.containsKey(threadId)) { this.lruStacks.put(threadId, new LinkedHashMap<>()); } if (!this.lruStacks.get(threadId).containsKey(set)) { this.lruStacks.get(threadId).put(set, new LRUStack(threadId, set, this.getL2Controller().getCache().getAssociativity())); } return this.lruStacks.get(threadId).get(set); } /** * Get the total MLP-cost for the specified thread ID and associativity in the specified set. * * @param threadId the thread ID * @param associativity the associativity * @return the total MLP-cost for the specified thread ID and associativity in the specified set */ public int getTotalMlpCost(int threadId, int associativity) { if (associativity > this.getL2Controller().getCache().getAssociativity()) { throw new IllegalArgumentException(); } MLPAwareStackDistanceProfile mlpAwareStackDistanceProfile = this.getMlpAwareStackDistanceProfile(threadId); int totalMlpCost = 0; for (int i = associativity - 1; i < this.getL2Controller().getCache().getAssociativity(); i++) { totalMlpCost += mlpAwareStackDistanceProfile.getHitCounters().get(i); } totalMlpCost += mlpAwareStackDistanceProfile.getMissCounter(); return totalMlpCost; } /** * Get the MLP-aware stack distance profile for the specified thread ID. * * @param threadId the thread ID * @return the MLP-aware stack distance profile for the specified thread ID */ private MLPAwareStackDistanceProfile getMlpAwareStackDistanceProfile(int threadId) { if (!this.mlpAwareStackDistanceProfiles.containsKey(threadId)) { this.mlpAwareStackDistanceProfiles.put(threadId, new MLPAwareStackDistanceProfile(this.getL2Controller().getCache().getAssociativity())); } return this.mlpAwareStackDistanceProfiles.get(threadId); } /** * Get the minimal sum of MLP-cost and its associated optimal partition. * * @return the minimal sum of MLP-cost and its associated optimal partition */ private Pair<Integer, List<Integer>> getOptimalMlpCostSumAndPartition() { if (this.partitions == null) { this.partitions = partition(this.getL2Controller().getCache().getAssociativity(), this.getNumThreads()); } int minMlpCostSum = Integer.MAX_VALUE; List<Integer> minPartition = null; for (List<Integer> partition : this.partitions) { int sum = 0; for (int i = 0; i < partition.size(); i++) { sum += this.getTotalMlpCost(i, partition.get(i)); } if (sum < minMlpCostSum) { minMlpCostSum = sum; minPartition = partition; } } return new Pair<>(minMlpCostSum, minPartition); } @Override public void dumpStats(ReportNode reportNode) { reportNode.getChildren().add(new ReportNode(reportNode, "mlpAwareCachePartitioningHelper") {{ getChildren().add(new ReportNode(this, "partition", getPartition() + "")); getChildren().add(new ReportNode(this, "numIntervals", getNumIntervals() + "")); getChildren().add(new ReportNode(this, "l2AccessMLPCostProfile/hitCounters", getL2AccessMLPCostProfile().getHitCounters() + "")); getChildren().add(new ReportNode(this, "l2AccessMLPCostProfile/missCounter", getL2AccessMLPCostProfile().getMissCounter() + "")); getChildren().add(new ReportNode(this, "memoryLatencyMeter/averageLatency", getMemoryLatencyMeter().getAverageLatency() + "")); }}); } /** * Get the L2 cache access MLP-cost profile. * * @return the L2 cache access MLP-cost profile */ public L2AccessMLPCostProfile getL2AccessMLPCostProfile() { return l2AccessMLPCostProfile; } /** * Get the memory latency meter. * * @return the memory latency meter */ public MemoryLatencyMeter getMemoryLatencyMeter() { return memoryLatencyMeter; } /** * Get the map of MLP aware stack distance profiles. * * @return the map of MLP aware stack distance profiles */ public Map<Integer, MLPAwareStackDistanceProfile> getMlpAwareStackDistanceProfiles() { return mlpAwareStackDistanceProfiles; } /** * Get the map of LRU stacks. * * @return the map of LRU stacks */ public Map<Integer, Map<Integer, LRUStack>> getLruStacks() { return lruStacks; } }