/* * This file is part of JOP, the Java Optimized Processor * see <http://www.jopdesign.com/> * * Copyright (C) 2010, Benedikt Huber (benedikt.huber@gmail.com) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.jopdesign.wcet; import com.jopdesign.common.MethodInfo; import com.jopdesign.common.code.CallGraph.ContextEdge; import com.jopdesign.common.code.CallString; import com.jopdesign.common.code.ExecutionContext; import com.jopdesign.common.processormodel.JOPConfig; import com.jopdesign.dfa.analyses.SymbolicAddress; import com.jopdesign.wcet.analysis.InvalidFlowFactException; import com.jopdesign.wcet.analysis.cache.ObjectCacheAnalysisDemo; import com.jopdesign.wcet.analysis.cache.ObjectCacheEvaluationResult; import com.jopdesign.wcet.analysis.cache.ObjectCacheEvaluationResult.OCacheAnalysisResult; import com.jopdesign.wcet.analysis.cache.ObjectCacheEvaluationResult.OCacheMode; import com.jopdesign.wcet.analysis.cache.ObjectCacheAnalysis; import com.jopdesign.wcet.jop.ObjectCache; import com.jopdesign.wcet.jop.ObjectCache.ObjectCacheCost; import org.jgrapht.traverse.TopologicalOrderIterator; import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.ArrayList; import java.util.List; import java.util.Set; import lpsolve.LpSolveException; public class ObjectCacheEvaluation { /* generator for object cache timings */ private interface ObjectCacheTiming { int loadTime(int words); void setObjectCacheTiming(ObjectCache objectCache, int lineSize); } private static class OCTimingUni implements ObjectCacheTiming { private int accessCycles; private int delay; private int cyclesPerWord; public OCTimingUni(int accessCycles, int delay, int cyclesPerWord) { this.accessCycles = accessCycles; this.delay = delay; this.cyclesPerWord = cyclesPerWord; } public int loadTime(int words) { return delay + words * cyclesPerWord; } public void setObjectCacheTiming(ObjectCache objectCache, int blockSize) { objectCache.setHitCycles(accessCycles); objectCache.setLoadFieldCycles(accessCycles + loadTime(1)); objectCache.setLoadBlockCycles(accessCycles + loadTime(blockSize)); } public String toString() { return String.format("S(D)RAM [access=%d, delay=%d, cycles-per-word=%d]", accessCycles,delay,cyclesPerWord); } } private static class OCTimingCmp implements ObjectCacheTiming { private int cores; private int wordsPerSlot; private int accessCycles; private int cyclesPerWord; private int delay; /* FIXME: may too conservative for wordsPerSlot > 1; do we have to wait (n-1) s + s-1 cycles before * loading the first word even for large slot length?? */ public OCTimingCmp(int cores, int wordsPerSlot, int accessCycles, int delay, int cyclesPerWord) { this.cores = cores; this.wordsPerSlot = wordsPerSlot; this.accessCycles = accessCycles; this.delay = delay; this.cyclesPerWord = cyclesPerWord; } // slot length = time to transmit wordsPerSlot words public int getSlotLength() { return delay + cyclesPerWord*wordsPerSlot; } // tmax (conservative, as I do not know CMP well enough for a more precise formula) // W .. words to transmit // S .. words per slot // tmax = s-1 + n * s * ceil(W/S) public int loadTime(int words) { int s = getSlotLength(); int maxRounds = ((words+wordsPerSlot-1)/wordsPerSlot); return (s-1) + cores * s * maxRounds; } public void setObjectCacheTiming(ObjectCache objectCache, int blockSize) { objectCache.setHitCycles(accessCycles); objectCache.setLoadFieldCycles(accessCycles + loadTime(1)); objectCache.setLoadBlockCycles(accessCycles + loadTime(blockSize)); } public String toString() { return String.format("SRAM [cores=%d,slotlength(l=4)=%d,access=%d,delay=%d,cycles-per-word=%d]", cores,getSlotLength(),accessCycles,delay,cyclesPerWord); } } private WCETTool project; public ObjectCacheEvaluation(WCETTool project) { this.project = project; } public boolean run(MethodInfo targetMethod) { try { evaluateObjectCache(targetMethod); return true; } catch (InvalidFlowFactException e) { e.printStackTrace(); return false; } catch (LpSolveException e) { e.printStackTrace(); return false; } } private void evaluateObjectCache(MethodInfo targetMethod) throws InvalidFlowFactException, LpSolveException { long start,stop; // Method Cache //testExactAllFit(); // Object Cache (debugging) ObjectCache objectCache = project.getWCETProcessorModel().getObjectCache(); if(objectCache == null) { throw new AssertionError("Cannot evaluate object cache on a processor without object cache"); } ObjectCacheAnalysis ocAnalysis = new ObjectCacheAnalysis(project, objectCache); // ocAnalysis.false, 1, 65536, ObjectCacheAnalysisDemo.DEFAULT_SET_SIZE); TopologicalOrderIterator<ExecutionContext, ContextEdge> cgIter = this.project.getCallGraph().topDownIterator(); while(cgIter.hasNext()) { ExecutionContext scope = cgIter.next(); Set<SymbolicAddress> addresses = ocAnalysis.getAddressSet(scope); String entryString = String.format("%-50s ==> |%d|%s ; Saturated Types: (%s)", scope, addresses.size(), ocAnalysis.getAddressSet(scope), ocAnalysis.getSaturatedTypes(scope)); System.out.println(" "+entryString); } // Object cache, evaluation PrintStream pStream; ExecHelper.TeePrintStream oStream; try { pStream = new PrintStream(project.getProjectConfig().getOutFile("ocache","eval.txt")); oStream = new ExecHelper.TeePrintStream(System.out, pStream); } catch (FileNotFoundException e) { oStream = new ExecHelper.TeePrintStream(System.out, null); } ObjectCacheAnalysisDemo oca; ObjectCacheTiming configs[] = { new OCTimingUni(0,0,2), // sram,uni: 2 cycles field cost, 2*w cycles line cost new OCTimingUni(0,10,2), // sdram,uni: 10+2*w cycles for each w-word access new OCTimingCmp(8, 1, 0, 0, 2), // SRAM, cmp, 2 cycles word load cost, s=2 new OCTimingCmp(8, 4, 0, 10, 2) // SDRAM, cmp, 18 cycles quadword load cost, s=18 }; OCacheMode[] modes = { OCacheMode.BLOCK_FILL, OCacheMode.SINGLE_FIELD }; List<OCacheAnalysisResult> samples = new ArrayList<OCacheAnalysisResult>(); int[] cacheWays = { 0, 2, 4, 8, 16, 32, 64, 512 }; // need to be in ascending order int[] lineSizesObjCache = { 4, 8, 16, 32}; int[] lineSizesFieldCache = { 1 }; int[] blockSizesObjCache = { 1, 2, 4, 8, 16 }; int[] lineSizes; for(int configId=0; configId < configs.length; configId++) { ObjectCacheTiming ocConfig = configs[configId]; oStream.println("---------------------------------------------------------"); oStream.println("Object Cache Configuration: "+ocConfig); oStream.println("---------------------------------------------------------"); for(OCacheMode mode : modes) { long maxCost = 0; // long cacheMisses = Long.MAX_VALUE; String modeString; lineSizes = lineSizesObjCache; if(mode == OCacheMode.BLOCK_FILL) modeString = "fill-block"; else { modeString = "field-as-tag"; lineSizes = lineSizesFieldCache; } boolean first = true; for(int lineSize : lineSizes) { for(int blockSize : blockSizesObjCache) { if(blockSize > lineSize) continue; if(mode == OCacheMode.BLOCK_FILL) { } else { if(blockSize > 1) continue; } /* Configure object cache timing */ /* We have to take field access count of cache size = 0; our analysis otherwise does not assign * sensible field access counts (thats the fault of the IPET method) */ long totalFieldAccesses = -1, cachedFieldAccesses = -1; double bestCyclesPerAccessForConfig = Double.POSITIVE_INFINITY; double bestHitRate = 0.0; long bestCostPerConfig = Long.MAX_VALUE; // assume cacheSizes are in ascending order for(int ways : cacheWays) { ocConfig.setObjectCacheTiming(objectCache, blockSize); if(mode == OCacheMode.SINGLE_FIELD) { objectCache = ObjectCache.createFieldCache(project, ways, 0, 0, 0); } else { objectCache = new ObjectCache(project, ways, blockSize, lineSize, 0, 0, 0); } ocConfig.setObjectCacheTiming(objectCache, blockSize); oca = new ObjectCacheAnalysisDemo(project, objectCache); double cyclesPerAccess, hitRate; ObjectCache.ObjectCacheCost ocCost = oca.computeCost(); long cost = ocCost.getCost(); if(cost < bestCostPerConfig) bestCostPerConfig = cost; double bestRatio,ratio; if(ways == 0) { maxCost = cost; totalFieldAccesses = ocCost.getTotalFieldAccesses(); cachedFieldAccesses = ocCost.getFieldAccessesWithoutBypass(); bestRatio = 1.0; ratio = 1.0; } else { bestRatio = (double)bestCostPerConfig/(double)maxCost; ratio = (double)cost/(double)maxCost; } cyclesPerAccess = (double)cost / (double)totalFieldAccesses ; if(cyclesPerAccess < bestCyclesPerAccessForConfig || ways <= 1) { bestCyclesPerAccessForConfig = cyclesPerAccess; } /* hit rate is defined as: 1 - ((cache misses+accesses to bypassed fields) / total field accesses (with n=0) */ long missAccesses = ocCost.getCacheMissCount() + ocCost.getBypassCount(); hitRate = (1 - ((double)missAccesses / (double)totalFieldAccesses)); if(hitRate > bestHitRate || ways <= 1) { bestHitRate = hitRate; } if(first) { oStream.println(String.format("***** ***** MODE = %s ***** *****\n",modeString)); oStream.println(String.format(" - max tags accessed (upper bound) = %d, max fields accesses = %d", oca.getMaxAccessedTags(targetMethod, CallString.EMPTY), totalFieldAccesses) ); first = false; } String report = String.format(" + Cycles Per Access [N=%3d,l=%2d,b=%2d]: %.2f (%d total cost, %.2f %% cost of no cache, %d bypass cost)", //, %.2f %% 'hitrate')", ways, lineSize, blockSize, bestCyclesPerAccessForConfig, cost, bestRatio*100, ocCost.getBypassCost()); if(bestCostPerConfig > cost) { report += String.format(" # (analysis cost increased by %.2f %% for this associativity)",ratio*100); } oStream.println(report); if(mode != OCacheMode.SINGLE_FIELD) { OCacheAnalysisResult sample = new ObjectCacheEvaluationResult.OCacheAnalysisResult(ways, lineSize, blockSize, configId, bestHitRate, bestCyclesPerAccessForConfig, ocCost); samples.add(sample); } } } } } } OCacheAnalysisResult.dumpBarPlot(samples, oStream); OCacheAnalysisResult.dumpPlot(samples, oStream); OCacheAnalysisResult.dumpLatex(samples, oStream); } }