package edu.brown.hstore; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.Map.Entry; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.voltdb.catalog.Host; import org.voltdb.catalog.Partition; import org.voltdb.utils.ThreadUtils; import edu.brown.catalog.CatalogUtil; import edu.brown.hstore.conf.HStoreConf; import edu.brown.interfaces.DebugContext; import edu.brown.logging.LoggerUtil; import edu.brown.logging.LoggerUtil.LoggerBoolean; import edu.brown.utils.StringUtil; import edu.brown.utils.ThreadUtil; /** * The thread manager is used to schedule periodic work and assign threads to * individual CPU cores down in the EE. * @author pavlo */ public class HStoreThreadManager { private static final Logger LOG = Logger.getLogger(HStoreThreadManager.class); private static final LoggerBoolean debug = new LoggerBoolean(); private static final LoggerBoolean trace = new LoggerBoolean(); static { LoggerUtil.attachObserver(LOG, debug, trace); } public enum ThreadGroupType { PROCESSING, EXECUTION, NETWORK, AUXILIARY, CLEANER }; private static final Pattern THREAD_NAME_SLITTER = Pattern.compile("\\-"); // ---------------------------------------------------------------------------- // DATA MEMBERS // ---------------------------------------------------------------------------- @SuppressWarnings("unused") private final HStoreSite hstore_site; private final HStoreConf hstore_conf; private boolean disable; private final ScheduledThreadPoolExecutor periodicWorkExecutor; private final int num_cores = ThreadUtil.getMaxGlobalThreads(); private final boolean defaultAffinity[]; private final Set<Thread> all_threads = new HashSet<Thread>(); /** * Set of CPU ids that the PartitionExecutor threads will not be * allowed to execute on. * @see HStoreConf.site.cpu_partition_blacklist */ private final Set<Integer> partitionBlacklist = new HashSet<Integer>(); /** * Set of CPU ids that the utility threads will not be * allowed to execute on. * @see HStoreConf.site.cpu_partition_blacklist */ private final Set<Integer> utilityBlacklist = new HashSet<Integer>(); /** * Mapping from Partition to individual CPU id * Note that this will contain all of the Partitions at this host */ private final Map<Partition, Integer> partitionCPUs = new HashMap<Partition, Integer>(); /** * Mapping from the CPU Id# to the Threads that pinned to it. * This is just for debugging purposes. If you modify this map, the threads * will not automatically be pinned to those CPUs. The real assignment is performed down * in the EE. */ private final Map<Integer, Set<Thread>> cpu_threads = new TreeMap<Integer, Set<Thread>>(); /** * Internal mapping from ThreadGroupType to the ThreadGroup handle */ private final Map<ThreadGroupType, ThreadGroup> threadGroups = new HashMap<ThreadGroupType, ThreadGroup>(); private final Map<String, boolean[]> utilityAffinities = new HashMap<String, boolean[]>(); private final String utility_suffixes[] = { HStoreConstants.THREAD_NAME_COMMANDLOGGER, HStoreConstants.THREAD_NAME_PERIODIC, HStoreConstants.THREAD_NAME_COORDINATOR, HStoreConstants.THREAD_NAME_QUEUE_MGR, HStoreConstants.THREAD_NAME_QUEUE_MGR, HStoreConstants.THREAD_NAME_QUEUE_RESTART, HStoreConstants.THREAD_NAME_TXNCLEANER, HStoreConstants.THREAD_NAME_POSTPROCESSOR, }; // ---------------------------------------------------------------------------- // CONSTRUCTOR // ---------------------------------------------------------------------------- public HStoreThreadManager(HStoreSite hstore_site) { this.hstore_site = hstore_site; this.hstore_conf = hstore_site.getHStoreConf(); // Partition Blacklist // Note that we assume that all sites on the same node have the same blacklist if (hstore_conf.site.cpu_partition_blacklist != null) { for (String part : hstore_conf.site.cpu_partition_blacklist.split(",")) { part = part.trim(); if (part.isEmpty()) continue; int cpuId = -1; try { cpuId = Integer.parseInt(part); assert(cpuId >= 0); } catch (Throwable ex) { LOG.error("Invalid CPU Id for partition blacklist '" + part + "'", ex); break; } this.partitionBlacklist.add(cpuId); } // FOR if (debug.val) LOG.debug("Partition CPU Blacklist: " + this.partitionBlacklist); } // Partition Blacklist if (hstore_conf.site.cpu_utility_blacklist != null) { for (String part : hstore_conf.site.cpu_utility_blacklist.split(",")) { part = part.trim(); if (part.isEmpty()) continue; int cpuId = -1; try { cpuId = Integer.parseInt(part); assert(cpuId >= 0); } catch (Throwable ex) { LOG.error("Invalid CPU Id for utility blacklist '" + part + "'", ex); break; } this.utilityBlacklist.add(cpuId); } // FOR if (debug.val) LOG.debug("Utility CPU Blacklist: " + this.utilityBlacklist); } // Periodic Work Thread String threadName = getThreadName(hstore_site, HStoreConstants.THREAD_NAME_PERIODIC); this.periodicWorkExecutor = ThreadUtil.getScheduledThreadPoolExecutor(threadName, hstore_site.getExceptionHandler(), 1, 1024 * 128); this.defaultAffinity = new boolean[this.num_cores]; Arrays.fill(this.defaultAffinity, true); for (int cpu : this.utilityBlacklist) { this.defaultAffinity[cpu] = false; } // FOR Host host = hstore_site.getHost(); Collection<Partition> host_partitions = CatalogUtil.getPartitionsForHost(host); if (hstore_conf.site.cpu_affinity == false) { this.disable = true; } else if (this.num_cores <= host_partitions.size()) { LOG.warn(String.format("Unable to set CPU affinity on %s because there are %d partitions " + "but only %d available CPU cores", host.getIpaddr(), host_partitions.size(), this.num_cores)); this.disable = true; } // Calculate what cores the partitions + utility threads are allowed // to execute on at this HStoreSite. We have to be careful about considering // other sites that may be on the same host (for testing). else { // Now figure out where the partition threads are allowed to execute // Note that we are doing this for all of the partitions at this host int cpuId = 0; for (Partition partition : host_partitions) { while (this.partitionBlacklist.contains(cpuId)) { cpuId++; } // WHILE this.partitionCPUs.put(partition, cpuId); this.defaultAffinity[cpuId] = false; cpuId++; } // FOR // Reserve the highest cores for the various utility threads // We want to pin these threads to a single core to make it easier to identify // what when one of them eats too much of it. if ((this.num_cores - host_partitions.size()) > this.utility_suffixes.length+2) { for (int i = 0; i < this.utility_suffixes.length; i++) { boolean affinity[] = this.utilityAffinities.get(this.utility_suffixes[i]); if (affinity == null) { affinity = new boolean[this.num_cores]; Arrays.fill(affinity, false); } int core = this.num_cores - (i+1); affinity[core] = true; this.defaultAffinity[core] = false; this.utilityAffinities.put(this.utility_suffixes[i], affinity); } // FOR } if (debug.val) LOG.debug("Default CPU Affinity: " + Arrays.toString(this.defaultAffinity)); } org.voltdb.EELibraryLoader.loadExecutionEngineLibrary(true); } public synchronized ThreadGroup getThreadGroup(ThreadGroupType type) { ThreadGroup group = this.threadGroups.get(type); if (group == null) { String name = StringUtil.title(type.name()) + " Threads"; group = new ThreadGroup(name); this.threadGroups.put(type, group); } return (group); } // ---------------------------------------------------------------------------- // PERIODIC WORK EXECUTOR // ---------------------------------------------------------------------------- /** * Internal method to register our single periodic thread with ourself. * This is a blocking call that will wait until the initialization * thread is succesfully executed. */ protected void initPerioidicThread() { final CountDownLatch latch = new CountDownLatch(1); Runnable r = new Runnable() { @Override public void run() { HStoreThreadManager.this.registerProcessingThread(); latch.countDown(); } }; this.scheduleWork(r); // Wait until it's finished boolean ret = false; try { ret = latch.await(1000, TimeUnit.MILLISECONDS); } catch (InterruptedException ex) { // Ignore... } assert(ret) : "Failed to initialize perioidic thread"; } public ScheduledThreadPoolExecutor getPeriodicWorkExecutor() { return (this.periodicWorkExecutor); } /** * From VoltDB * @param work * @param initialDelay * @param delay * @param unit * @return */ public ScheduledFuture<?> schedulePeriodicWork(Runnable work, long initialDelay, long delay, TimeUnit unit) { assert(delay > 0); return this.periodicWorkExecutor.scheduleWithFixedDelay(work, initialDelay, delay, unit); } /** * Schedule a Runnable to be run once and only once with no initial dealy * @param work * @return */ public void scheduleWork(Runnable work) { //LOG.info("We've scheduled that!"); this.periodicWorkExecutor.execute(work); } /** * Schedule a Runnable to be run once and only once. The initialDelay specifies * how long the scheduler should wait before invoking the Runnable * @param work * @param initialDelay * @param unit * @return */ public ScheduledFuture<?> scheduleWork(Runnable work, long initialDelay, TimeUnit unit) { return this.periodicWorkExecutor.schedule(work, initialDelay, unit); } // ---------------------------------------------------------------------------- // THREAD-TO-CPU AFFINITY METHODS // ---------------------------------------------------------------------------- /** * Set the CPU affinity for the EE thread executing for the given partition * @param partition */ public synchronized boolean registerEEThread(Partition partition) { if (this.disable) return (false); Thread t = Thread.currentThread(); boolean affinity[] = null; try { affinity = ThreadUtils.getThreadAffinity(); // This doesn't seem to work on newer versions of OSX, so we'll just disable it } catch (UnsatisfiedLinkError ex) { LOG.warn("Unable to set CPU affinity for the ExecutionEngine thread for partition" + partition + ". " + "Disabling feature in ExecutionEngine", ex); this.disable = true; return (false); } assert(affinity != null); Arrays.fill(affinity, false); // Only allow this EE to execute on a single core if (hstore_conf.site.cpu_affinity_one_partition_per_core) { int core = this.partitionCPUs.get(partition); affinity[core] = true; } // Allow this EE to run on any of the lower cores allocated for this Site else { for (Partition p : this.partitionCPUs.keySet()) { if (p.getParent().equals(partition.getParent())) { int core = this.partitionCPUs.get(p); affinity[core] = true; } } // FOR } if (debug.val) LOG.debug(String.format("Registering EE Thread %s to execute on CPUs %s", t.getName(), this.getCPUIds(affinity))); if (this.registerThread(t, affinity) == false) { return (false); } // final boolean endingAffinity[] = ThreadUtils.getThreadAffinity(); // for (int ii = 0; ii < endingAffinity.length; ii++) { // if (trace.val && endingAffinity[ii]) // LOG.trace(String.format("NEW AFFINITY %s -> CPU[%d]", partition, ii)); // affinity[ii] = false; // } // FOR if (debug.val) LOG.debug(String.format("Successfully set affinity for thread '%s' on CPUs %s\n%s", t.getName(), this.getCPUIds(affinity), this.debug())); return (true); } /** * Set the CPU affinity for the current non-EE thread * This thread cannot run on the EE's cores */ public synchronized boolean registerProcessingThread() { if (this.disable) return (false); boolean affinity[] = this.defaultAffinity; Thread t = Thread.currentThread(); // Check whether this is as utility thread that we want to pin // to a certain number of cores String nameParts[] = THREAD_NAME_SLITTER.split(t.getName()); String suffix = (nameParts.length > 1 ? nameParts[1] : nameParts[0]); if (this.utilityAffinities.containsKey(suffix)) { affinity = this.utilityAffinities.get(suffix); if (trace.val) LOG.trace("Using utility affinity for '" + suffix + "'"); } if (debug.val) LOG.debug(String.format("Registering Processing Thread %s to execute on CPUs %s", t.getName(), this.getCPUIds(affinity))); if (this.registerThread(t, affinity) == false) { return (false); } if (debug.val) LOG.debug(String.format("Successfully set affinity for thread '%s' on CPUs %s\n%s", t.getName(), this.getCPUIds(affinity), this.debug())); return (true); } private boolean registerThread(Thread t, boolean affinity[]) { // If this fails (such as on OS X for some weird reason), we'll // just print a warning rather than crash try { this.disable = (ThreadUtils.setThreadAffinity(affinity) == false); } catch (UnsatisfiedLinkError ex) { LOG.warn("Unable to set CPU affinity for thread '" + t.getName() + "'. Disabling feature", ex); this.disable = true; return (false); } if (this.disable) { LOG.warn("Unable to set CPU affinity for thread '" + t.getName() + "'. Disabling feature"); return (false); } for (int i = 0; i < affinity.length; i++) { if (affinity[i]) { Set<Thread> s = this.cpu_threads.get(i); if (s == null) { s = new HashSet<Thread>(); this.cpu_threads.put(i, s); } s.add(t); } } // FOR this.all_threads.add(t); return (true); } /** * For the given affinity mapping, return the corresponding CPU Ids. * This is for debugging * @param affinity * @return */ private Collection<Integer> getCPUIds(boolean affinity[]) { Collection<Integer> cpus = new ArrayList<Integer>(); for (int i = 0; i < affinity.length; i++) { if (affinity[i]) cpus.add(i); } return (cpus); } /** * Return the total number of cores at this host * Note that this does not take into consideration other sites that may * be running at the same host * @return */ public int getNumCores() { return (this.num_cores); } /** * Returns true if cpu affinity pinning is enabled * @return */ public boolean isEnabled() { return (this.disable == false); } // ---------------------------------------------------------------------------- // THREAD NAME FORMATTERS // ---------------------------------------------------------------------------- public static final String formatSiteName(Integer site_id) { if (site_id == null) return (null); return (getThreadName(site_id, null)); } public static final String formatPartitionName(int site_id, int partition) { return (getThreadName(site_id, partition)); } public static final String getThreadName(HStoreSite hstore_site, Integer partition) { return (getThreadName(hstore_site.getSiteId(), partition)); } public static final String getThreadName(HStoreSite hstore_site, String...suffixes) { return (getThreadName(hstore_site.getSiteId(), null, suffixes)); } public static final String getThreadName(HStoreSite hstore_site, Integer partition, String...suffixes) { return (getThreadName(hstore_site.getSiteId(), partition, suffixes)); } /** * Formatted site name * @param site_id * @param partition - Can be null * @param suffix - Can be null * @return */ public static final String getThreadName(int site_id, Integer partition, String...suffixes) { String suffix = null; if (suffixes != null && suffixes.length > 0) suffix = StringUtil.join("-", suffixes); if (suffix == null) suffix = ""; if (suffix.isEmpty() == false) { suffix = "-" + suffix; if (partition != null) suffix = String.format("-%03d%s", partition.intValue(), suffix); } else if (partition != null) { suffix = String.format("-%03d", partition.intValue()); } return (String.format("H%02d%s", site_id, suffix)); } // ---------------------------------------------------------------------------- // DEBUG METHODS // ---------------------------------------------------------------------------- public synchronized String debug() { Map<String, Object> m = new LinkedHashMap<String, Object>(); for (Entry<Integer, Set<Thread>> e : this.cpu_threads.entrySet()) { TreeSet<String> names = new TreeSet<String>(); for (Thread t : e.getValue()) { names.add(t.getName()); } // FOR m.put("CPU #" + e.getKey(), names.toString()); } // FOR return (StringUtil.formatMaps(m)); } public class Debug implements DebugContext { public Map<Integer, Set<Thread>> getCPUThreads() { return Collections.unmodifiableMap(cpu_threads); } public boolean isRegistered(Thread t) { return (all_threads.contains(t)); } /** * Returns all the CPU ids that the given Thread is allowed to execute on * @param t * @return */ public Collection<Integer> getCPUIds(Thread t) { Collection<Integer> cpus = new HashSet<Integer>(); for (Integer cpu : cpu_threads.keySet()) { if (cpu_threads.get(cpu).contains(t)) { cpus.add(cpu); } } // FOR return (cpus); } } private Debug cachedDebugContext; public Debug getDebugContext() { if (this.cachedDebugContext == null) { this.cachedDebugContext = new Debug(); } return (this.cachedDebugContext); } }