/** * (C) Copyright IBM Corp. 2010, 2015 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *  */ package com.ibm.bi.dml.runtime.controlprogram.caching; import java.io.File; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.ibm.bi.dml.api.DMLScript; import com.ibm.bi.dml.parser.Expression.DataType; import com.ibm.bi.dml.parser.Expression.ValueType; import com.ibm.bi.dml.runtime.DMLRuntimeException; import com.ibm.bi.dml.runtime.controlprogram.caching.LazyWriteBuffer.RPolicy; import com.ibm.bi.dml.runtime.controlprogram.parfor.util.IDSequence; import com.ibm.bi.dml.runtime.instructions.cp.Data; import com.ibm.bi.dml.runtime.matrix.data.MatrixBlock; import com.ibm.bi.dml.runtime.util.LocalFileUtils; /** * Each object of this class is a cache envelope for some large piece of data * called "data blob". (I prefer "blob" to "block" to avoid ambiguity.) For * example, the body of a matrix can be the data blob. The term "data blob" * refers strictly to the cacheable portion of the data object, often excluding * metadata and auxiliary parameters, as defined in the subclasses. * Under the protection of the envelope, the data blob may be evicted to * the file system; then the subclass must set its reference to <code>null</code> * to allow Java garbage collection. If other parts of the system continue * keep references to the data blob, its eviction will not release any memory. * To make the eviction meaningful, the rest of the system * must dispose of all references prior to giving the permission for eviction. * */ public abstract class CacheableData extends Data { private static final long serialVersionUID = -413810592207212835L; protected static final Log LOG = LogFactory.getLog(CacheableData.class.getName()); public static final long CACHING_THRESHOLD = 4*1024; //obj not s.t. caching if below threshold [in bytes] public static final double CACHING_BUFFER_SIZE = 0.15; public static final RPolicy CACHING_BUFFER_POLICY = RPolicy.FIFO; public static final boolean CACHING_BUFFER_PAGECACHE = false; public static final boolean CACHING_WRITE_CACHE_ON_READ = false; public static final String CACHING_COUNTER_GROUP_NAME = "SystemML Caching Counters"; //flag indicating if caching is turned on (eviction writes only happen if activeFlag is true) private static boolean _activeFlag = false; public static String cacheEvictionLocalFilePath = null; //set during init public static String cacheEvictionLocalFilePrefix = "cache"; public static final String cacheEvictionLocalFileExtension = ".dat"; /** * Defines all possible cache status types for a data blob. * An object of class {@link CacheableData} can be in one of the following * five status types: * * <code>EMPTY</code>: Either there is no data blob at all, or the data blob * resides in a specified import file and has never been downloaded yet. * <code>READ</code>: The data blob is in main memory; one or more threads are * referencing and reading it (shared "read-only" lock). This status uses a * counter. Eviction is NOT allowed. * <code>MODIFY</code>: The data blob is in main memory; exactly one thread is * referencing and modifying it (exclusive "write" lock). Eviction is NOT allowed. * <code>CACHED</code>: The data blob is in main memory, and nobody is using nor referencing it. * There is always an persistent recovery object for it **/ protected enum CacheStatus { EMPTY, READ, MODIFY, CACHED, CACHED_NOWRITE, }; private static IDSequence _seq = null; static { _seq = new IDSequence(); } /** * The unique (JVM-wide) ID of a cacheable data object; to ensure unique IDs across JVMs, we * concatenate filenames with a unique prefix (map task ID). */ private final int _uniqueID; /** * The cache status of the data blob (whether it can be or is evicted, etc.) */ private CacheStatus _cacheStatus = null; private int _numReadThreads = 0; protected CacheableData (DataType dt, ValueType vt) { super (dt, vt); _uniqueID = (int)_seq.getNextID(); _cacheStatus = CacheStatus.EMPTY; _numReadThreads = 0; } // --------- ABSTRACT LOW-LEVEL CACHE I/O OPERATIONS ---------- /** * Checks if the data blob reference points to some in-memory object. * This method is called when releasing the (last) lock. Do not call * this method for a blob that has been evicted. * * @return <code>true</code> if the blob is in main memory and the * reference points to it; * <code>false</code> if the blob reference is <code>null</code>. */ protected abstract boolean isBlobPresent(); /** * Low-level cache I/O method that physically evicts the data blob from * main memory. Must be defined by a subclass, never called by users. * @param mb * * @throws CacheIOException if the eviction fails, the data blob * remains as it was at the start. */ protected abstract void evictBlobFromMemory(MatrixBlock mb) throws CacheIOException; /** * Low-level cache I/O method that physically restores the data blob to * main memory. Must be defined by a subclass, never called by users. * * @throws CacheIOException if the restore fails, the data blob * remains as it was at the start. * @throws CacheAssignmentException if the restored blob cannot be assigned * to this envelope. */ protected abstract void restoreBlobIntoMemory() throws CacheIOException; /** * Low-level cache I/O method that deletes the file containing the * evicted data blob, without reading it. * Must be defined by a subclass, never called by users. */ protected abstract void freeEvictedBlob(); /** * */ protected abstract boolean isBelowCachingThreshold(); // ------------- IMPLEMENTED CACHE LOGIC METHODS -------------- protected int getUniqueCacheID() { return _uniqueID; } /** * This method "acquires the lock" to ensure that the data blob is in main memory * (not evicted) while it is being accessed. When called, the method will try to * restore the blob if it has been evicted. There are two kinds of locks it may * acquire: a shared "read" lock (if the argument is <code>false</code>) or the * exclusive "modify" lock (if the argument is <code>true</code>). * The method can fail in three ways: * (1) if there is lock status conflict; * (2) if there is not enough cache memory to restore the blob; * (3) if the restore method returns an error. * The method locks the data blob in memory (which disables eviction) and updates * its last-access timestamp. For the shared "read" lock, acquiring a new lock * increments the associated count. The "read" count has to be decremented once * the blob is no longer used, which may re-enable eviction. This method has to * be called only once per matrix operation and coupled with {@link #release()}, * because it increments the lock count and the other method decrements this count. * * @param isModify : <code>true</code> for the exclusive "modify" lock, * <code>false</code> for a shared "read" lock. * @throws CacheException */ protected void acquire (boolean isModify, boolean restore) throws CacheException { switch ( _cacheStatus ) { case CACHED: if(restore) restoreBlobIntoMemory(); case CACHED_NOWRITE: case EMPTY: if (isModify) setModify(); else addOneRead(); break; case READ: if (isModify) throw new CacheStatusException ("READ-MODIFY not allowed."); else addOneRead(); break; case MODIFY: throw new CacheStatusException ("MODIFY-MODIFY not allowed."); } if( LOG.isTraceEnabled() ) LOG.trace("Acquired lock on " + this.getDebugName() + ", status: " + this.getStatusAsString() ); } /** * Call this method to permit eviction for the stored data blob, or to * decrement its "read" count if it is "read"-locked by other threads. * It is expected that you eliminate all external references to the blob * prior to calling this method, because otherwise eviction will * duplicate the blob, but not release memory. This method has to be * called only once per process and coupled with {@link #acquire(boolean)}, * because it decrements the lock count and the other method increments * the lock count. * * @throws CacheException */ protected void release(boolean cacheNoWrite) throws CacheException { switch ( _cacheStatus ) { case EMPTY: case CACHED: case CACHED_NOWRITE: throw new CacheStatusException("Redundant release."); case READ: removeOneRead( isBlobPresent(), cacheNoWrite ); break; case MODIFY: if ( isBlobPresent() ) setCached(); else setEmpty(); break; } if( LOG.isTraceEnabled() ) LOG.trace("Released lock on " + this.getDebugName() + ", status: " + this.getStatusAsString()); } // ************************************************** // *** *** // *** CACHE STATUS FIELD - CLASSES AND METHODS *** // *** *** // ************************************************** public String getStatusAsString() { return _cacheStatus.toString(); } //TODO isCached is only public for access from SparkExectionContext, once we can assume //the existence of spark libraries, we can move the related code to MatrixObject and //make this method protected again public boolean isCached(boolean inclCachedNoWrite) { if( inclCachedNoWrite ) return (_cacheStatus == CacheStatus.CACHED || _cacheStatus == CacheStatus.CACHED_NOWRITE); else return (_cacheStatus == CacheStatus.CACHED); } protected boolean isEmpty(boolean inclCachedNoWrite) { if( inclCachedNoWrite ) return (_cacheStatus == CacheStatus.EMPTY || _cacheStatus == CacheStatus.CACHED_NOWRITE); else return (_cacheStatus == CacheStatus.EMPTY); } protected boolean isModify() { return (_cacheStatus == CacheStatus.MODIFY); } protected void setEmpty() { _cacheStatus = CacheStatus.EMPTY; } protected void setModify() { _cacheStatus = CacheStatus.MODIFY; } protected void setCached() { _cacheStatus = CacheStatus.CACHED; } protected void addOneRead() { _numReadThreads ++; _cacheStatus = CacheStatus.READ; } protected void removeOneRead(boolean doesBlobExist, boolean cacheNoWrite) { _numReadThreads --; if (_numReadThreads == 0) { if( cacheNoWrite ) _cacheStatus = (doesBlobExist ? CacheStatus.CACHED_NOWRITE : CacheStatus.EMPTY); else _cacheStatus = (doesBlobExist ? CacheStatus.CACHED : CacheStatus.EMPTY); } } protected boolean isAvailableToRead() { return ( _cacheStatus == CacheStatus.EMPTY || _cacheStatus == CacheStatus.CACHED || _cacheStatus == CacheStatus.CACHED_NOWRITE || _cacheStatus == CacheStatus.READ); } protected boolean isAvailableToModify() { return ( _cacheStatus == CacheStatus.EMPTY || _cacheStatus == CacheStatus.CACHED || _cacheStatus == CacheStatus.CACHED_NOWRITE); } // --------- STATIC CACHE INIT/CLEANUP OPERATIONS ---------- /** * */ public synchronized static void cleanupCacheDir() { //cleanup remaining cached writes LazyWriteBuffer.cleanup(); //delete cache dir and files cleanupCacheDir(true); } /** * Deletes the DML-script-specific caching working dir. * * @param withDir */ public synchronized static void cleanupCacheDir(boolean withDir) { //get directory name String dir = cacheEvictionLocalFilePath; //clean files with cache prefix if( dir != null ) //if previous init cache { File fdir = new File(dir); if( fdir.exists()){ //just for robustness File[] files = fdir.listFiles(); for( File f : files ) if( f.getName().startsWith(cacheEvictionLocalFilePrefix) ) f.delete(); if( withDir ) fdir.delete(); //deletes dir only if empty } } _activeFlag = false; } /** * Inits caching with the default uuid of DMLScript * @throws IOException */ public synchronized static void initCaching() throws IOException { initCaching(DMLScript.getUUID()); } /** * Creates the DML-script-specific caching working dir. * * Takes the UUID in order to allow for custom uuid, e.g., for remote parfor caching * * @throws IOException */ public synchronized static void initCaching( String uuid ) throws IOException { try { String dir = LocalFileUtils.getWorkingDir( LocalFileUtils.CATEGORY_CACHE ); LocalFileUtils.createLocalFileIfNotExist(dir); cacheEvictionLocalFilePath = dir; } catch(DMLRuntimeException e) { throw new IOException(e); } //init write-ahead buffer LazyWriteBuffer.init(); _activeFlag = true; //turn on caching } public static synchronized boolean isCachingActive() { return _activeFlag; } public static synchronized void disableCaching() { _activeFlag = false; } public static synchronized void enableCaching() { _activeFlag = true; } }