/**
* ****************************************************************************
* Copyright (c) 2010-2016 by Min Cai (min.cai.china@gmail.com).
* <p>
* This file is part of the Archimulator multicore architectural simulator.
* <p>
* Archimulator is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* <p>
* Archimulator is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* <p>
* You should have received a copy of the GNU General Public License
* along with Archimulator. If not, see <http://www.gnu.org/licenses/>.
* ****************************************************************************
*/
package archimulator.core.speculativePrecomputation;
import archimulator.common.Logger;
import archimulator.common.Simulation;
import archimulator.core.Core;
import archimulator.core.Processor;
import archimulator.core.Thread;
import archimulator.core.event.DynamicInstructionCommittedEvent;
import archimulator.core.event.DynamicInstructionDecodedEvent;
import archimulator.isa.ArchitecturalRegisterFile;
import archimulator.isa.StaticInstruction;
import archimulator.isa.StaticInstructionType;
import archimulator.os.Context;
import archimulator.os.ContextKilledEvent;
import archimulator.uncore.MemoryHierarchyAccess;
import archimulator.uncore.MemoryHierarchyAccessType;
import archimulator.uncore.cache.*;
import archimulator.uncore.cache.replacement.CacheReplacementPolicyType;
import archimulator.uncore.coherence.event.GeneralCacheControllerServiceNonblockingRequestEvent;
import archimulator.uncore.coherence.msi.controller.DirectoryController;
import archimulator.uncore.delinquentLoad.DelinquentLoad;
import archimulator.uncore.delinquentLoad.DelinquentLoadIdentificationTable;
import archimulator.util.ValueProvider;
import org.apache.commons.collections.iterators.ReverseListIterator;
import java.util.*;
/**
* Dynamic speculative precomputation helper.
*
* @author Min Cai
*/
public class DynamicSpeculativePrecomputationHelper {
private EvictableCache<Boolean> sliceCache;
private Map<Thread, DelinquentLoadIdentificationTable> delinquentLoadIdentificationTables;
private Map<Thread, RetiredInstructionBuffer> retiredInstructionBuffers;
private Map<Thread, SliceInformationTable> sliceInformationTables;
/**
* Create a dynamic speculative precomputation helper.
*
* @param simulation the simulation
*/
public DynamicSpeculativePrecomputationHelper(Simulation simulation) {
Processor processor = simulation.getProcessor();
this.sliceCache = new BasicEvictableCache<>(
processor,
"sliceCache",
new CacheGeometry(SLICE_CACHE_CAPACITY, SLICE_CACHE_CAPACITY, 1),
CacheReplacementPolicyType.LRU,
args -> new BooleanValueProvider()
);
this.delinquentLoadIdentificationTables = new HashMap<>();
this.retiredInstructionBuffers = new HashMap<>();
this.sliceInformationTables = new HashMap<>();
for (Core core : processor.getCores()) {
for (Thread thread : core.getThreads()) {
this.getDelinquentLoadIdentificationTables().put(thread, new DelinquentLoadIdentificationTable(thread));
this.getRetiredInstructionBuffers().put(thread, new RetiredInstructionBuffer(this, thread));
this.getSliceInformationTables().put(thread, new SliceInformationTable(this, thread));
}
}
processor.getBlockingEventDispatcher().addListener(
DelinquentLoadIdentificationTable.DelinquentLoadIdentifiedEvent.class,
event -> getRetiredInstructionBuffers().get(event.getThread()).gatherInstructionsFor(event.getDelinquentLoad())
);
}
/**
* Get the slice cache.
*
* @return the slice cache
*/
public EvictableCache<Boolean> getSliceCache() {
return sliceCache;
}
/**
* Get the map of delinquent load identification tables.
*
* @return the map of delinquent load identification tables
*/
public Map<Thread, DelinquentLoadIdentificationTable> getDelinquentLoadIdentificationTables() {
return delinquentLoadIdentificationTables;
}
/**
* Get the map of retired instruction buffers.
*
* @return the map of retired instruction buffers
*/
public Map<Thread, RetiredInstructionBuffer> getRetiredInstructionBuffers() {
return retiredInstructionBuffers;
}
/**
* Get the map of slice information tables.
*
* @return the map of slice information tables
*/
public Map<Thread, SliceInformationTable> getSliceInformationTables() {
return sliceInformationTables;
}
/**
* Boolean value provider.
*/
public class BooleanValueProvider implements ValueProvider<Boolean> {
private boolean state;
private Map<Integer, Integer> machineInstructions;
/**
* Create a boolean value provider.
*/
private BooleanValueProvider() {
this.state = false;
}
/**
* Get the value.
*
* @return the value
*/
@Override
public Boolean get() {
return state;
}
/**
* Get the initial value.
*
* @return the initial value
*/
@Override
public Boolean getInitialValue() {
return false;
}
public Map<Integer, Integer> getMachineInstructions() {
return machineInstructions;
}
}
/**
* Find an invalid line and create a new miss for the specified address.
*
* @param thread the thread
* @param address the address
* @param set the set index
* @return an invalid line and the newly created miss for the specified address
*/
private CacheAccess<Boolean> findInvalidLineAndNewMiss(Thread thread, int address, int set) {
int tag = this.getSliceCache().getTag(address);
for (int way = 0; way < this.getSliceCache().getAssociativity(); way++) {
CacheLine<Boolean> line = this.getSliceCache().getLine(set, way);
if (line.getState() == line.getInitialState()) {
return new CacheAccess<>(this.getSliceCache(), new MemoryHierarchyAccess(null, thread, MemoryHierarchyAccessType.UNKNOWN, -1, address, tag, null), set, way, tag);
}
}
return null;
}
/**
* Retired instruction buffer.
*/
public static class RetiredInstructionBuffer {
private DynamicSpeculativePrecomputationHelper dynamicSpeculativePrecomputationHelper;
private Thread thread;
private Stack<RetiredInstruction> retiredInstructions;
private List<StackAddressTableEntry> stackAddressTable;
private RetiredInstructionBufferState state;
private DelinquentLoad delinquentLoad;
/**
* Create a retired instruction buffer.
*
* @param dynamicSpeculativePrecomputationHelper
* the dynamic speculative precomputation helper
* @param thread the thread
*/
private RetiredInstructionBuffer(DynamicSpeculativePrecomputationHelper dynamicSpeculativePrecomputationHelper, Thread thread) {
this.dynamicSpeculativePrecomputationHelper = dynamicSpeculativePrecomputationHelper;
this.thread = thread;
this.retiredInstructions = new Stack<>();
this.stackAddressTable = new ArrayList<>();
this.state = RetiredInstructionBufferState.IDLE;
this.thread.getBlockingEventDispatcher().addListener(DynamicInstructionCommittedEvent.class, event -> {
if (event.getDynamicInstruction().getThread() == RetiredInstructionBuffer.this.thread) {
if (state == RetiredInstructionBufferState.IDLE && delinquentLoad != null && event.getDynamicInstruction().getPc() == delinquentLoad.getPc() && !event.getDynamicInstruction().getThread().getContext().getFunctionCallContextStack().isEmpty() && event.getDynamicInstruction().getThread().getContext().getFunctionCallContextStack().peek().getPc() == delinquentLoad.getFunctionCallPc()) {
state = RetiredInstructionBufferState.INSTRUCTION_GATHERING;
retiredInstructions.add(new RetiredInstruction(event.getDynamicInstruction().getPc(), event.getDynamicInstruction().isUseStackPointerAsEffectiveAddressBase(), event.getDynamicInstruction().getEffectiveAddressDisplacement(), event.getDynamicInstruction().getStaticInstruction()));
} else if (state == RetiredInstructionBufferState.INSTRUCTION_GATHERING) {
if (retiredInstructions.size() >= CAPACITY) {
retiredInstructions.remove(retiredInstructions.firstElement());
}
retiredInstructions.add(new RetiredInstruction(event.getDynamicInstruction().getPc(), event.getDynamicInstruction().isUseStackPointerAsEffectiveAddressBase(), event.getDynamicInstruction().getEffectiveAddressDisplacement(), event.getDynamicInstruction().getStaticInstruction()));
if (event.getDynamicInstruction().getPc() == delinquentLoad.getPc() && !event.getDynamicInstruction().getThread().getContext().getFunctionCallContextStack().isEmpty() && event.getDynamicInstruction().getThread().getContext().getFunctionCallContextStack().peek().getPc() == delinquentLoad.getFunctionCallPc()) {
buildSlice();
}
}
}
});
}
/**
* Gather the instructions for the specified delinquent load.
*
* @param delinquentLoad the delinquent load
*/
private void gatherInstructionsFor(DelinquentLoad delinquentLoad) {
if (this.state == RetiredInstructionBufferState.IDLE) {
this.delinquentLoad = delinquentLoad;
}
}
/**
* Build the slice.
*/
private void buildSlice() {
this.state = RetiredInstructionBufferState.SLICE_BUILDING;
this.stackAddressTable.clear();
Slice slice = new Slice(this.delinquentLoad);
// for (RetiredInstruction retiredInstruction : this.retiredInstructions) {
// System.out.println(retiredInstruction);
// }
slice.getLiveIns().addAll(this.retiredInstructions.lastElement().staticInstruction.getInputDependencies());
this.mark(this.retiredInstructions.lastElement());
RetiredInstruction lastAnalyzedRetiredInstruction = null;
for (Iterator it = new ReverseListIterator(this.retiredInstructions); it.hasNext(); ) {
RetiredInstruction retiredInstruction = (RetiredInstruction) it.next();
if (this.checkIfNoIgnoredStoreLoadDependence(retiredInstruction)) {
for (int outputDependency : retiredInstruction.staticInstruction.getOutputDependencies()) {
if (slice.getLiveIns().contains(outputDependency)) {
this.mark(retiredInstruction);
slice.getLiveIns().removeAll(retiredInstruction.staticInstruction.getOutputDependencies());
slice.getLiveIns().addAll(retiredInstruction.staticInstruction.getInputDependencies());
break;
}
}
lastAnalyzedRetiredInstruction = retiredInstruction;
}
}
for (StackAddressTableEntry stackAddressTableEntry : this.stackAddressTable) {
this.unmark(stackAddressTableEntry.retiredInstruction);
}
if (lastAnalyzedRetiredInstruction == null) {
throw new IllegalArgumentException();
}
slice.setTriggerPc(lastAnalyzedRetiredInstruction.pc);
this.retiredInstructions.stream().filter(retiredInstruction -> retiredInstruction.marked).forEach(retiredInstruction -> {
slice.getPcs().add(retiredInstruction.pc);
});
this.dynamicSpeculativePrecomputationHelper.getSliceInformationTables().get(this.thread).storeSlice(slice);
this.retiredInstructions.clear();
this.state = RetiredInstructionBufferState.IDLE;
this.delinquentLoad = null;
}
/**
* Mark the specified retired instruction.
*
* @param retiredInstruction the retired instruction to be marked
*/
private void mark(RetiredInstruction retiredInstruction) {
retiredInstruction.marked = true;
if (retiredInstruction.staticInstruction.getMnemonic().getType() == StaticInstructionType.LOAD && retiredInstruction.useStackPointerAsEffectiveAddressBase) {
int displacement = retiredInstruction.effectiveAddressDisplacement;
int nonEffectiveAddressBaseDep = retiredInstruction.staticInstruction.getNonEffectiveAddressBaseDependency();
this.stackAddressTable.add(new StackAddressTableEntry(retiredInstruction, displacement, nonEffectiveAddressBaseDep));
}
}
/**
* Check whether the retired instruction has no ignored store-load dependence.
*
* @param retiredInstruction the retired instruction
* @return a value indicating whether the specified retired instruction has no ignored store-load dependence
*/
private boolean checkIfNoIgnoredStoreLoadDependence(RetiredInstruction retiredInstruction) {
for (Iterator<StackAddressTableEntry> iterator = this.stackAddressTable.iterator(); iterator.hasNext(); ) {
if (retiredInstruction.staticInstruction.getOutputDependencies().contains(iterator.next().nonEffectiveAddressBaseDependency)) {
iterator.remove();
}
}
if (retiredInstruction.staticInstruction.getMnemonic().getType() == StaticInstructionType.STORE && retiredInstruction.useStackPointerAsEffectiveAddressBase) {
for (StackAddressTableEntry stackAddressTableEntry : this.stackAddressTable) {
if (stackAddressTableEntry.displacement == retiredInstruction.effectiveAddressDisplacement) {
return false;
}
}
}
return true;
}
/**
* Unmark the specified retired instruction.
*
* @param retiredInstruction the retired instruction
*/
private void unmark(RetiredInstruction retiredInstruction) {
retiredInstruction.marked = false;
}
/**
* Stack address table entry.
*/
private class StackAddressTableEntry {
private RetiredInstruction retiredInstruction;
private int displacement;
private int nonEffectiveAddressBaseDependency;
/**
* Create a stack address table entry.
*
* @param retiredInstruction the retired instruction
* @param displacement the displacement
* @param nonEffectiveAddressBaseDependency
* the non-effective address base dependency
*/
private StackAddressTableEntry(RetiredInstruction retiredInstruction, int displacement, int nonEffectiveAddressBaseDependency) {
this.retiredInstruction = retiredInstruction;
this.displacement = displacement;
this.nonEffectiveAddressBaseDependency = nonEffectiveAddressBaseDependency;
}
}
/**
* Retired instruction.
*/
private class RetiredInstruction {
private boolean marked;
private int pc;
private boolean useStackPointerAsEffectiveAddressBase;
private int effectiveAddressDisplacement;
private StaticInstruction staticInstruction;
/**
* Create a retired instruction.
*
* @param pc the value of the program counter (PC)
* @param useStackPointerAsEffectiveAddressBase
* a value indicating whether using the stack pointer as the effective address base or not
* @param effectiveAddressDisplacement the effective address displacement
* @param staticInstruction the static instruction
*/
private RetiredInstruction(int pc, boolean useStackPointerAsEffectiveAddressBase, int effectiveAddressDisplacement, StaticInstruction staticInstruction) {
this.marked = false;
this.pc = pc;
this.useStackPointerAsEffectiveAddressBase = useStackPointerAsEffectiveAddressBase;
this.effectiveAddressDisplacement = effectiveAddressDisplacement;
this.staticInstruction = staticInstruction;
}
@Override
public String toString() {
return String.format("RetiredInstruction{marked=%s, pc=%d, useStackPointerAsEffectiveAddressBase=%s, effectiveAddressDisplacement=%d, staticInstruction=%s}", marked, pc, useStackPointerAsEffectiveAddressBase, effectiveAddressDisplacement, staticInstruction);
}
}
private static final int CAPACITY = 512;
}
/**
* Slice information table.
*/
public static class SliceInformationTable {
private DynamicSpeculativePrecomputationHelper dynamicSpeculativePrecomputationHelper;
private Thread thread;
private List<Slice> slices;
/**
* Create a slice information table for the specified thread.
*
* @param dynamicSpeculativePrecomputationHelper
* the dynamic speculative precomputation helper
* @param thread the thread
*/
private SliceInformationTable(DynamicSpeculativePrecomputationHelper dynamicSpeculativePrecomputationHelper, Thread thread) {
this.dynamicSpeculativePrecomputationHelper = dynamicSpeculativePrecomputationHelper;
this.thread = thread;
this.slices = new ArrayList<>();
this.thread.getBlockingEventDispatcher().addListener(DynamicInstructionDecodedEvent.class, event -> {
for (Slice slice : slices) {
if (event.getDynamicInstruction().getPc() == slice.getTriggerPc() && slice.getSpawnedThreadContext() == null) {
spawnPrecomputationThread(slice, event.getDynamicInstruction().getThread().getContext());
break;
}
}
for (Slice slice : slices) {
if (slice.getSpawnedThreadContext() != null && event.getDynamicInstruction().getThread().getContext() == slice.getSpawnedThreadContext()) {
slice.setNumDecodedInstructions(slice.getNumDecodedInstructions() + 1);
break;
}
}
});
this.thread.getBlockingEventDispatcher().addListener(DynamicInstructionCommittedEvent.class, event -> {
if (SliceInformationTable.this.thread.getNumInstructions() % INSTRUCTIONS_PER_PHASE == 0) {
for (Iterator<Slice> it = slices.iterator(); it.hasNext(); ) {
Slice slice = it.next();
if (slice.getNumSpawnings() > 0) {
if (!evaluateEffectiveness(slice)) {
it.remove();
}
}
}
}
});
this.thread.getBlockingEventDispatcher().addListener(ContextKilledEvent.class, event -> {
for (Iterator<Slice> iterator = slices.iterator(); iterator.hasNext(); ) {
Slice slice = iterator.next();
if (event.getContext() == slice.getSpawnedThreadContext()) {
slice.setSpawnedThreadContext(null);
if (slice.isIneffective()) {
prepareRemoveSlice(slice);
iterator.remove();
}
break;
}
}
});
this.thread.getBlockingEventDispatcher().addListener(GeneralCacheControllerServiceNonblockingRequestEvent.class, event -> {
if (!event.isHitInCache() && event.getCacheController() instanceof DirectoryController && event.getAccess().getType().isRead()) {
for (Slice slice : slices) {
if (slice.getSpawnedThreadContext() != null) {
if (event.getAccess().getThread().getContext() == slice.getSpawnedThreadContext()) {
slice.setNumSavedL2Misses(slice.getNumSavedL2Misses() + 1);
break;
}
}
}
}
});
}
/**
* Store the specified slice.
*
* @param slice the slice to be stored
*/
private void storeSlice(Slice slice) {
//TODO: how to support longer pcs? e.g., slice chains?
if (slice.getPcs().size() > 64) {
throw new IllegalArgumentException(slice.getPcs().size() + "");
}
int set = dynamicSpeculativePrecomputationHelper.getSliceCache().getSet(slice.getTriggerPc());
CacheAccess<Boolean> cacheAccess = dynamicSpeculativePrecomputationHelper.findInvalidLineAndNewMiss(thread, slice.getTriggerPc(), set);
if (cacheAccess != null) {
CacheLine<Boolean> line = cacheAccess.getLine();
BooleanValueProvider stateProvider = (BooleanValueProvider) line.getStateProvider();
stateProvider.state = true;
line.setTag(slice.getTriggerPc());
dynamicSpeculativePrecomputationHelper.getSliceCache().getReplacementPolicy().handleInsertionOnMiss(null, set, cacheAccess.getWay());
Map<Integer, Integer> machInsts = new TreeMap<>();
int i = FIRST_INSTRUCTION_PC;
for (int pc : slice.getPcs()) {
machInsts.put(i, pc); //TODO: trick for the moment: store mappedPc instead of machInst to use predecoding
i += 4;
}
stateProvider.machineInstructions = machInsts;
slices.add(slice);
} else {
Logger.infof(Logger.THREAD, "%s: There is no sufficient hardware resource for storing slice, kindly ignored.", this.thread.getCycleAccurateEventQueue().getCurrentCycle(), this.thread.getName());
}
}
/**
* Spawn a precomputation thread for the specified slice and context.
*
* @param slice the slice
* @param context the context
*/
private void spawnPrecomputationThread(Slice slice, Context context) {
ArchitecturalRegisterFile newRegs = new ArchitecturalRegisterFile(context.getProcess().isLittleEndian());
for (int liveIn : slice.getLiveIns()) {
newRegs.copyRegisterFrom(context.getRegisterFile(), liveIn);
}
final PrecomputationContext newContext = new PrecomputationContext(dynamicSpeculativePrecomputationHelper, context, newRegs, slice.getTriggerPc());
if (this.thread.getCore().getProcessor().getKernel().map(newContext, candidateThreadId -> {
int candidateCoreNum = candidateThreadId / thread.getCore().getProcessor().getExperiment().getConfig().getNumThreadsPerCore();
int parentCoreNum = newContext.getParent().getThreadId() / thread.getCore().getProcessor().getExperiment().getConfig().getNumThreadsPerCore();
return candidateCoreNum != parentCoreNum;
})) {
context.getKernel().getContexts().add(newContext);
newContext.getRegisterFile().setNpc(FIRST_INSTRUCTION_PC);
newContext.getRegisterFile().setNnpc(newContext.getRegisterFile().getNpc() + 4);
slice.setNumSpawnings(slice.getNumSpawnings() + 1);
slice.setSpawnedThreadContext(newContext);
} else {
Logger.infof(Logger.THREAD, "%s: There is no sufficient hardware resource for spawning precomputation thread, kindly ignored.", this.thread.getCycleAccurateEventQueue().getCurrentCycle(), this.thread.getName());
}
}
/**
* Evaluate the effectiveness of the specified slice.
*
* @param slice the slice to be evaluated
* @return TODO
*/
private boolean evaluateEffectiveness(Slice slice) {
if (slice.getNumSavedL2Misses() * AVERAGE_L2_MISS_LATENCY_SAVING > slice.getNumDecodedInstructions()) {
return true;
} else if (slice.getSpawnedThreadContext() != null) {
slice.setIneffective(true);
return true;
} else {
this.prepareRemoveSlice(slice);
return false;
}
}
/**
* Prepare to remove the specified slice.
*
* @param slice the slice to be removed
*/
private void prepareRemoveSlice(Slice slice) {
dynamicSpeculativePrecomputationHelper.getDelinquentLoadIdentificationTables().get(this.thread).removeDelinquentLoad(slice.getDelinquentLoad());
CacheLine<Boolean> lineFound = dynamicSpeculativePrecomputationHelper.getSliceCache().findLine(slice.getTriggerPc());
BooleanValueProvider stateProvider = (BooleanValueProvider) lineFound.getStateProvider();
stateProvider.state = false;
lineFound.setTag(CacheLine.INVALID_TAG);
stateProvider.machineInstructions = null;
}
private static final int INSTRUCTIONS_PER_PHASE = 128000;
private static final int AVERAGE_L2_MISS_LATENCY_SAVING = 110;
private static final int FIRST_INSTRUCTION_PC = 0x1000;
}
private static final int SLICE_CACHE_CAPACITY = 32;
}