package water; import java.lang.management.*; import java.util.Arrays; import java.util.concurrent.atomic.AtomicLong; import javax.management.Notification; import javax.management.NotificationEmitter; import jsr166y.ForkJoinPool.ManagedBlocker; import jsr166y.ForkJoinPool; import water.util.Log; import water.util.PrettyPrint; /** * Manages memory assigned to key/value pairs. All byte arrays used in * keys/values should be allocated through this class - otherwise we risking * running out of java memory, and throw unexpected OutOfMemory errors. The * theory here is that *most* allocated bytes are allocated in large chunks by * allocating new Values - with large backing arrays. If we intercept these * allocation points, we cover most Java allocations. If such an allocation * might trigger an OOM error we first free up some other memory. * * MemoryManager monitors memory used by the K/V store (by walking through the * store (see Cleaner) and overall heap usage by hooking into gc. * * Memory is freed if either the cached memory is above the limit or if the * overall heap usage is too high (in which case we want to use less mem for * cache). There is also a lower limit on the amount of cache so that we never * delete all the cache and therefore some computation should always be able to * progress. * * The amount of memory to be freed is determined as the max of cached mem above * the limit and heap usage above the limit. * * One of the primary control inputs is FullGC cycles: we check heap usage and * set guidance for cache levels. We assume after a FullGC that the heap only * has POJOs (Plain Old Java Objects, unknown size) and K/V Cached stuff * (counted by us). We compute the free heap as MEM_MAX-heapUsage (after GC), * and we compute POJO size as (heapUsage - K/V cache usage). * * @author tomas * @author cliffc */ abstract public class MemoryManager { // Track timestamp of last oom log to avoid spamming the logs with junk. private static volatile long oomLastLogTimestamp = 0; private static final long SIXTY_SECONDS_IN_MILLIS = 60 * 1000; // max heap memory public static final long MEM_MAX = Runtime.getRuntime().maxMemory(); // Callbacks from GC static final HeapUsageMonitor HEAP_USAGE_MONITOR = new HeapUsageMonitor(); // Keep the K/V store below this threshold AND this is the FullGC call-back // threshold - which is limited in size to the old-gen pool size. static long MEM_CRITICAL; // Block allocations? static volatile boolean CAN_ALLOC = true; private static volatile boolean MEM_LOW_CRITICAL = false; // Lock for blocking on allocations private static final Object _lock = new Object(); // A monotonically increasing total count memory allocated via MemoryManager. // Useful in tracking total memory consumed by algorithms - just ask for the // before & after amounts and diff them. static void setMemGood() { if( CAN_ALLOC ) return; synchronized(_lock) { CAN_ALLOC = true; _lock.notifyAll(); } // NO LOGGING UNDER LOCK! Log.warn("Continuing after swapping"); } static void setMemLow() { if( !H2O.ARGS.cleaner ) return; // Cleaner turned off if( !CAN_ALLOC ) return; synchronized(_lock) { CAN_ALLOC = false; } // NO LOGGING UNDER LOCK! Log.warn("Pausing to swap to disk; more memory may help"); } static boolean canAlloc() { return CAN_ALLOC; } static void set_goals( String msg, boolean oom){ set_goals(msg, oom, 0); } // Set K/V cache goals. // Allow (or disallow) allocations. // Called from the Cleaner, when "cacheUsed" has changed significantly. // Called from any FullGC notification, and HEAP/POJO_USED changed. // Called on any OOM allocation static void set_goals( String msg, boolean oom , long bytes) { // Our best guess of free memory, as of the last GC cycle final long heapUsedGC = Cleaner.HEAP_USED_AT_LAST_GC; final long timeGC = Cleaner.TIME_AT_LAST_GC; final long freeHeap = MEM_MAX - heapUsedGC; assert freeHeap >= 0 : "I am really confused about the heap usage; MEM_MAX="+MEM_MAX+" heapUsedGC="+heapUsedGC; // Current memory held in the K/V store. final long cacheUsageGC = Cleaner.KV_USED_AT_LAST_GC; // Our best guess of POJO object usage: Heap_used minus cache used final long pojoUsedGC = Math.max(heapUsedGC - cacheUsageGC,0); // Block allocations if: // the cache is > 7/8 MEM_MAX, OR // we cannot allocate an equal amount of POJOs, pojoUsedGC > freeHeap. // Decay POJOS_USED by 1/8th every 5 sec: assume we got hit with a single // large allocation which is not repeating - so we do not need to have // double the POJO amount. // Keep at least 1/8th heap for caching. // Emergency-clean the cache down to the blocking level. long d = MEM_CRITICAL; // Block-allocation level; cache can grow till this // Decay POJO amount long p = pojoUsedGC; long age = (System.currentTimeMillis() - timeGC); // Age since last FullGC age = Math.min(age,10*60*1000 ); // Clip at 10mins while( (age-=5000) > 0 ) p = p-(p>>3); // Decay effective POJO by 1/8th every 5sec d -= 2*p - bytes; // Allow for the effective POJO, and again to throttle GC rate (and allow for this allocation) d = Math.max(d,MEM_MAX>>3); // Keep at least 1/8th heap if( Cleaner.DESIRED != -1 ) // Set to -1 only for OOM/Cleaner testing. Never negative normally Cleaner.DESIRED = d; // Desired caching level final long cacheUsageNow = Cleaner.Histo.cached(); boolean skipThisLogMessageToAvoidSpammingTheLogs = false; String m=""; if( cacheUsageNow > Cleaner.DESIRED ) { m = (CAN_ALLOC?"Swapping! ":"blocked: "); if( oom ) setMemLow(); // Stop allocations; trigger emergency clean Cleaner.kick_store_cleaner(); } else { // Else we are not *emergency* cleaning, but may be lazily cleaning. setMemGood(); // Cache is below desired level; unblock allocations if( oom ) { // But still have an OOM? m = "Unblock allocations; cache below desired, but also OOM: "; // Means the heap is full of uncached POJO's - which cannot be spilled. // Here we enter the zone of possibly dieing for OOM. There's no point // in blocking allocations, as no more memory can be freed by more // cache-flushing. Might as well proceed on a "best effort" basis. long now = System.currentTimeMillis(); if ((now - oomLastLogTimestamp) >= SIXTY_SECONDS_IN_MILLIS) { oomLastLogTimestamp = now; } else { skipThisLogMessageToAvoidSpammingTheLogs = true; } } else { m = "MemGood: "; // Cache is low enough, room for POJO allocation - full steam ahead! } } if (skipThisLogMessageToAvoidSpammingTheLogs) { return; } // No logging if under memory pressure: can deadlock the cleaner thread String s = m+msg+", (K/V:"+PrettyPrint.bytes(cacheUsageGC)+" + POJO:"+PrettyPrint.bytes(pojoUsedGC)+" + FREE:"+PrettyPrint.bytes(freeHeap)+" == MEM_MAX:"+PrettyPrint.bytes(MEM_MAX)+"), desiredKV="+PrettyPrint.bytes(Cleaner.DESIRED)+(oom?" OOM!":" NO-OOM"); if( CAN_ALLOC ) { if( oom ) Log.warn(s); else Log.debug(s); } else System.err.println(s); } /** Monitors the heap usage after full gc run and tells Cleaner to free memory * if mem usage is too high. Stops new allocation if mem usage is critical. * @author tomas */ private static class HeapUsageMonitor implements javax.management.NotificationListener { MemoryMXBean _allMemBean = ManagementFactory.getMemoryMXBean(); // general // Determine the OldGen GC pool size - which is saved in MEM_CRITICAL as // the max desirable K/V store size. HeapUsageMonitor() { int c = 0; for( MemoryPoolMXBean m : ManagementFactory.getMemoryPoolMXBeans() ) { if( m.getType() != MemoryType.HEAP ) // only interested in HEAP continue; if( m.isCollectionUsageThresholdSupported() && m.isUsageThresholdSupported()) { // Really idiotic API: no idea what the usageThreshold is, so I have // to guess. Start high, catch IAE & lower by 1/8th and try again. long gc_callback = MEM_MAX; while( true ) { try { m.setCollectionUsageThreshold(gc_callback); break; } catch( IllegalArgumentException iae ) { // Expected IAE: means we used too high a callback level gc_callback -= (gc_callback>>3); } } m.setCollectionUsageThreshold(1); // Call back for every fullgc NotificationEmitter emitter = (NotificationEmitter) _allMemBean; emitter.addNotificationListener(this, null, m); ++c; MEM_CRITICAL = gc_callback; // Set old-gen heap level } } assert c == 1; } /** Callback routine called by JVM after full gc run. Has two functions: * 1) sets the amount of memory to be cleaned from the cache by the Cleaner * 2) sets the CAN_ALLOC flag to false if memory level is critical */ @Override public void handleNotification(Notification notification, Object handback) { String notifType = notification.getType(); if( !notifType.equals(MemoryNotificationInfo.MEMORY_COLLECTION_THRESHOLD_EXCEEDED)) return; // Memory used after this FullGC Cleaner.TIME_AT_LAST_GC = System.currentTimeMillis(); Cleaner.HEAP_USED_AT_LAST_GC = _allMemBean.getHeapMemoryUsage().getUsed(); Cleaner.KV_USED_AT_LAST_GC = Cleaner.Histo.cached(); MEM_LOW_CRITICAL = Cleaner.HEAP_USED_AT_LAST_GC > 0.75*MEM_MAX; Log.debug("GC CALLBACK: "+Cleaner.TIME_AT_LAST_GC+", USED:"+PrettyPrint.bytes(Cleaner.HEAP_USED_AT_LAST_GC)+", CRIT: "+MEM_LOW_CRITICAL); set_goals("GC CALLBACK",MEM_LOW_CRITICAL); //if( MEM_LOW_CRITICAL ) { // emergency measure - really low on memory, stop allocations right now! // setMemLow(); // In-use memory is > 3/4 heap; block allocations //} else if( Cleaner.HEAP_USED_AT_LAST_GC < (MEM_MAX - (MEM_MAX >> 1)) ) // setMemGood(); // In use memory is < 1/2 heap; allow allocations even if Cleaner is still running } } // Allocates memory with cache management // Will block until there is enough available memory. // Catches OutOfMemory, clears cache & retries. static Object malloc(int elems, long bytes, int type, Object orig, int from ) { return malloc(elems,bytes,type,orig,from,false); } static Object malloc(int elems, long bytes, int type, Object orig, int from , boolean force) { assert elems >= 0 : "Bad size " + elems; // is 0 okay?! // Do not assert on large-size here. RF's temp internal datastructures are // single very large arrays. //assert bytes < Value.MAX : "malloc size=0x"+Long.toHexString(bytes); while( true ) { if( (!MEM_LOW_CRITICAL && !force) && !CAN_ALLOC && // Not allowing allocations? bytes > 256 && // Allow tiny ones in any case // To prevent deadlock, we cannot block the cleaner thread in any // case. This is probably an allocation for logging (ouch! shades of // logging-induced deadlock!) which will probably be recycled quickly. !(Thread.currentThread() instanceof Cleaner) ) { synchronized(_lock) { try { _lock.wait(300*1000); } catch (InterruptedException ex) { } } } try { switch( type ) { case 1: return new byte [elems]; case 2: return new short [elems]; case 4: return new int [elems]; case 8: return new long [elems]; case 5: return new float [elems]; case 9: return new double [elems]; case 0: return new boolean[elems]; case 10: return new Object [elems]; case -1: return Arrays.copyOfRange((byte [])orig,from,elems); case -4: return Arrays.copyOfRange((int [])orig,from,elems); case -8: return Arrays.copyOfRange((long [])orig,from,elems); case -9: return Arrays.copyOfRange((double[])orig,from,elems); default: throw H2O.fail(); } } catch( OutOfMemoryError e ) { // Do NOT log OutOfMemory, it is expected and unavoidable and handled // in most cases by spilling to disk. if( Cleaner.isDiskFull() ) { Log.err("Disk full, space left = " + Cleaner.availableDiskSpace()); UDPRebooted.suicide(UDPRebooted.T.oom, H2O.SELF); } } set_goals("OOM",true, bytes); // Low memory; block for swapping } } // Allocates memory with cache management public static byte [] malloc1 (int size) { return malloc1(size,false); } public static byte [] malloc1 (int size, boolean force) { return (byte [])malloc(size,size*1, 1,null,0,force); } public static short [] malloc2 (int size) { return (short [])malloc(size,size*2L, 2,null,0); } public static int [] malloc4 (int size) { return (int [])malloc(size,size*4L, 4,null,0); } public static long [] malloc8 (int size) { return (long [])malloc(size,size*8L, 8,null,0); } public static float [] malloc4f(int size) { return (float [])malloc(size,size*4L, 5,null,0); } public static double [] malloc8d(int size) { if(size < 32) try { // fast path for small arrays (e.g. histograms in gbm) return new double [size]; } catch (OutOfMemoryError oom){/* fall through */} return (double [])malloc(size,size*8L, 9,null,0); } public static double [][] malloc8d(int m, int n) { double [][] res = new double[m][]; for(int i = 0; i < m; ++i) res[i] = malloc8d(n); return res; } public static boolean[] mallocZ (int size) { return (boolean[])malloc(size,size , 0,null,0); } public static Object [] mallocObj(int size){ return (Object [])malloc(size,size*8L,10,null,0,false); } public static byte [] arrayCopyOfRange(byte [] orig, int from, int sz) { return (byte []) malloc(sz,(sz-from) ,-1,orig,from); } public static int [] arrayCopyOfRange(int [] orig, int from, int sz) { return (int []) malloc(sz,(sz-from)*4,-4,orig,from); } public static long [] arrayCopyOfRange(long [] orig, int from, int sz) { return (long []) malloc(sz,(sz-from)*8,-8,orig,from); } public static double [] arrayCopyOfRange(double[] orig, int from, int sz) { return (double[]) malloc(sz,(sz-from)*8,-9,orig,from); } public static byte [] arrayCopyOf( byte [] orig, int sz) { return arrayCopyOfRange(orig,0,sz); } public static int [] arrayCopyOf( int [] orig, int sz) { return arrayCopyOfRange(orig,0,sz); } public static long [] arrayCopyOf( long [] orig, int sz) { return arrayCopyOfRange(orig,0,sz); } public static double [] arrayCopyOf( double[] orig, int sz) { return arrayCopyOfRange(orig,0,sz); } // Memory available for tasks (we assume 3/4 of the heap is available for tasks) static final AtomicLong _taskMem = new AtomicLong(MEM_MAX-(MEM_MAX>>2)); /** * Try to reserve memory needed for task execution and return true if * succeeded. Tasks have a shared pool of memory which they should ask for * in advance before they even try to allocate it. * * This method is another backpressure mechanism to make sure we do not * exhaust system's resources by running too many tasks at the same time. * Tasks are expected to reserve memory before proceeding with their * execution and making sure they release it when done. * * @param m - requested number of bytes * @return true if there is enough free memory */ static boolean tryReserveTaskMem(long m){ if(!CAN_ALLOC)return false; if( m == 0 ) return true; assert m >= 0:"m < 0: " + m; long current = _taskMem.addAndGet(-m); if(current < 0){ _taskMem.addAndGet(m); return false; } return true; } private static Object _taskMemLock = new Object(); static void reserveTaskMem(long m){ final long bytes = m; while(!tryReserveTaskMem(bytes)){ try { ForkJoinPool.managedBlock(new ManagedBlocker() { @Override public boolean isReleasable() {return _taskMem.get() >= bytes;} @Override public boolean block() throws InterruptedException { synchronized(_taskMemLock){ try {_taskMemLock.wait();} catch( InterruptedException e ) {} } return isReleasable(); } }); } catch (InterruptedException e){ Log.throwErr(e); } } } /** * Free the memory successfully reserved by task. * @param m */ static void freeTaskMem(long m){ if(m == 0)return; _taskMem.addAndGet(m); synchronized(_taskMemLock){ _taskMemLock.notifyAll(); } } }