/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.utils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
/**
* Measures performance numbers when GPU mode is enabled
* Printed as part of {@link Statistics}.
*/
public class GPUStatistics {
//TODO fix formatting
//TODO replace AtomicLong with LongAdder
// Whether or not extra per-instruction statistics will be recorded and shown for the GPU
public static boolean DISPLAY_STATISTICS = false;
private static int iNoOfExecutedGPUInst = 0;
public static long cudaInitTime = 0;
public static long cudaLibrariesInitTime = 0;
public static AtomicLong cudaSparseToDenseTime = new AtomicLong(0); // time spent in converting sparse matrix block to dense
public static AtomicLong cudaDenseToSparseTime = new AtomicLong(0); // time spent in converting dense matrix block to sparse
public static AtomicLong cudaSparseConversionTime = new AtomicLong(0); // time spent in converting between sparse block types
public static AtomicLong cudaSparseToDenseCount = new AtomicLong(0);
public static AtomicLong cudaDenseToSparseCount = new AtomicLong(0);
public static AtomicLong cudaSparseConversionCount = new AtomicLong(0);
public static AtomicLong cudaAllocTime = new AtomicLong(0); // time spent in allocating memory on the GPU
public static AtomicLong cudaDeAllocTime = new AtomicLong(0); // time spent in deallocating memory on the GPU
public static AtomicLong cudaMemSet0Time = new AtomicLong(0); // time spent in setting memory to 0 on the GPU (part of reusing and for new allocates)
public static AtomicLong cudaToDevTime = new AtomicLong(0); // time spent in copying data from host (CPU) to device (GPU) memory
public static AtomicLong cudaFromDevTime = new AtomicLong(0); // time spent in copying data from device to host
public static AtomicLong cudaAllocCount = new AtomicLong(0);
public static AtomicLong cudaDeAllocCount = new AtomicLong(0);
public static AtomicLong cudaMemSet0Count = new AtomicLong(0);
public static AtomicLong cudaToDevCount = new AtomicLong(0);
public static AtomicLong cudaFromDevCount = new AtomicLong(0);
public static AtomicLong cudaEvictionCount = new AtomicLong(0);
// Per instruction miscellaneous timers.
// Used to record events in a CP Heavy Hitter instruction and
// provide a breakdown of how time was spent in that instruction
private static HashMap<String, HashMap<String, Long>> _cpInstMiscTime = new HashMap<String, HashMap<String, Long>> ();
private static HashMap<String, HashMap<String, Long>> _cpInstMiscCount = new HashMap<String, HashMap<String, Long>> ();
/**
* Resets the miscellaneous timers & counters
*/
public static void resetMiscTimers(){
_cpInstMiscTime.clear();
_cpInstMiscCount.clear();
}
/**
* Resets all the cuda counters and timers, including the misc timers & counters
*/
public static void reset(){
cudaInitTime = 0;
cudaLibrariesInitTime = 0;
cudaAllocTime.set(0);
cudaDeAllocTime.set(0);
cudaMemSet0Time.set(0);
cudaMemSet0Count.set(0);
cudaToDevTime.set(0);
cudaFromDevTime.set(0);
cudaAllocCount.set(0);
cudaDeAllocCount.set(0);
cudaToDevCount.set(0);
cudaFromDevCount.set(0);
cudaEvictionCount.set(0);
resetMiscTimers();
}
public static synchronized void setNoOfExecutedGPUInst(int numJobs) {
iNoOfExecutedGPUInst = numJobs;
}
public static synchronized void incrementNoOfExecutedGPUInst() {
iNoOfExecutedGPUInst ++;
}
public static synchronized int getNoOfExecutedGPUInst() {
return iNoOfExecutedGPUInst;
}
/**
* "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count
* @param instructionName name of the instruction/op
* @param miscTimer name of the miscellaneous timer
* @param timeNanos time in nano seconds
* @param incrementCount how much to increment the count of the miscTimer by
*/
public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos, long incrementCount)
{
if (!DISPLAY_STATISTICS)
return;
HashMap<String, Long> miscTimesMap = _cpInstMiscTime.get(instructionName);
if (miscTimesMap == null) {
miscTimesMap = new HashMap<String, Long>();
_cpInstMiscTime.put(instructionName, miscTimesMap);
}
Long oldVal = miscTimesMap.get(miscTimer);
Long newVal = timeNanos + ((oldVal!=null) ? oldVal : 0);
miscTimesMap.put(miscTimer, newVal);
HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName);
if (miscCountMap == null){
miscCountMap = new HashMap<String, Long>();
_cpInstMiscCount.put(instructionName, miscCountMap);
}
Long oldCnt = miscCountMap.get(miscTimer);
Long newCnt = incrementCount + ((oldCnt!=null) ? oldCnt : 0);
miscCountMap.put(miscTimer, newCnt);
}
/**
* "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count by 1
* @param instructionName name of the instruction/op
* @param miscTimer name of the miscellaneous timer
* @param timeNanos time in nano seconds
*/
public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos){
maintainCPMiscTimes(instructionName, miscTimer, timeNanos, 1);
}
/**
* Used to print misc timers (and their counts) for a given instruction/op
* @param instructionName name of the instruction/op
* @return a formatted string of misc timers for a given instruction/op
*/
public static String getStringForCPMiscTimesPerInstruction(String instructionName) {
StringBuffer sb = new StringBuffer();
HashMap<String, Long> miscTimerMap = _cpInstMiscTime.get(instructionName);
if (miscTimerMap != null) {
List<Map.Entry<String, Long>> sortedList = new ArrayList<Map.Entry<String, Long>>(miscTimerMap.entrySet());
// Sort the times to display by the most expensive first
Collections.sort(sortedList, new Comparator<Map.Entry<String, Long>>() {
@Override
public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
return (int) (o1.getValue() - o2.getValue());
}
});
Iterator<Map.Entry<String, Long>> miscTimeIter = sortedList.iterator();
HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName);
while (miscTimeIter.hasNext()) {
Map.Entry<String, Long> e = miscTimeIter.next();
String miscTimerName = e.getKey();
Long miscTimerTime = e.getValue();
Long miscCount = miscCountMap.get(miscTimerName);
sb.append(miscTimerName + "[" + String.format("%.3f", (double) miscTimerTime / 1000000000.0) + "s," + miscCount + "]");
if (miscTimeIter.hasNext())
sb.append(", ");
}
}
return sb.toString();
}
/**
* Used to print out cuda timers & counters
* @return a formatted string of cuda timers & counters
*/
public static String getStringForCudaTimers() {
StringBuffer sb = new StringBuffer();
sb.append("CUDA/CuLibraries init time:\t" + String.format("%.3f", cudaInitTime*1e-9) + "/"
+ String.format("%.3f", cudaLibrariesInitTime*1e-9) + " sec.\n");
sb.append("Number of executed GPU inst:\t" + getNoOfExecutedGPUInst() + ".\n");
sb.append("GPU mem tx time (alloc/dealloc/set0/toDev/fromDev):\t"
+ String.format("%.3f", cudaAllocTime.get()*1e-9) + "/"
+ String.format("%.3f", cudaDeAllocTime.get()*1e-9) + "/"
+ String.format("%.3f", cudaMemSet0Time.get()*1e-9) + "/"
+ String.format("%.3f", cudaToDevTime.get()*1e-9) + "/"
+ String.format("%.3f", cudaFromDevTime.get()*1e-9) + " sec.\n");
sb.append("GPU mem tx count (alloc/dealloc/set0/toDev/fromDev/evict):\t"
+ cudaAllocCount.get() + "/"
+ cudaDeAllocCount.get() + "/"
+ cudaMemSet0Count.get() + "/"
+ cudaSparseConversionCount.get() + "/"
+ cudaToDevCount.get() + "/"
+ cudaFromDevCount.get() + "/"
+ cudaEvictionCount.get() + ".\n");
sb.append("GPU conversion time (sparseConv/sp2dense/dense2sp):\t"
+ String.format("%.3f", cudaSparseConversionTime.get()*1e-9) + "/"
+ String.format("%.3f", cudaSparseToDenseTime.get()*1e-9) + "/"
+ String.format("%.3f", cudaDenseToSparseTime.get()*1e-9) + " sec.\n");
sb.append("GPU conversion count (sparseConv/sp2dense/dense2sp):\t"
+ cudaSparseConversionCount.get() + "/"
+ cudaSparseToDenseCount.get() + "/"
+ cudaDenseToSparseCount.get() + ".\n");
return sb.toString();
}
}