/**
* Copyright 2013, Landz and its contributors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package z.offheap.zmalloc;
import z.channel.MPMCQueue;
import z.util.SystemProperty;
import z.util.primitives.Longs;
import java.util.Optional;
import static z.util.Contracts.contract;
import static z.util.Unsafes.*;
import static z.util.Unsafes.systemAllocateMemory;
/**
* "zmalloc" - Landz's off-heap direct memory allocator.
* <p>
* It is designed to have high performance with scalable hardware support,
* zero garbage generation and built-in statistics in the pure Java.
*
* <p>
* zmalloc concepts:
* <p> GlobalPool: a memory area which the allocator can use.
* <p> ThreadLocalPool: a thread local memory area for efficient chunk
* allocation.
* It will request/release memories from/to GlobalPool
* when necessary.
* <p> Page: a up-level internal allocation unit which is larger than a Chunk,
* as the chunk cache for some specific SizeType.
* it is always in 3 states: free, partial, full.
* <p> Chunk: just a continuous memory block, basic allocation unit in zmalloc,
* which is in a specific SizeType
* <p> SizeClass: Chunks are only allocated in one size of SizeClass
*
* <p>
*
* <b>Designer Note:</b>
*
* <p>
* Landz's zmalloc is designed from scratch to avoid any license problem.
* There are three public reference for the inspirations:
* <ul>
* <li> one article about Memcached's slab allocator
* (http://nosql.mypopescu.com/post/13506116892/memcached-internals-memory-
* allocation-eviction);
* <li> one book chapter about linux kernel slab allocator(detailed link);
* <li> one paper about lock-free memory allocator by Oracle's Dave Dice
* (detailed link);
* </ul>
* <p>
* Generally, the zmalloc is more like as slab allocator.
* It has the possible to meet "pathological case"
* (There are workaround suggestions). But as my investigate to public bug
* reports, ptmalloc and tcmalloc also has this problem.
*
* <p>
* The main structure of current impl is two-level: ZMPage, and ZMChunck.
* Because the page and chunk is common, so I ZM- prefix to make them
* distinguished with others. And more additions, the naming in Allocator class
* is C-style, in that: <br/>
* <ul>
* <li> there are many new concepts in this allocator field,
* then full camel-case naming is hard to read, short camel-case naming is
* talking nothing; <br/>
* <li> to warning the developer we are in dangerous area.
* </ul>
* <p>
* ZMPage: is the container of the ZMChuncks, which is 2MB now.
* 2MB is not random chosen size, which is the default large Page size in
* Linux. This limits current max size of chunk can be allocated by zmalloc
* <br/>(NOTE: There are several ways for solving this. I may push a version to
* support a larger size in a near future.)
*
* <p>
* Because zmalloc makes the page to the same size. The page can be shared
* between threadlocal pool and global pool.
* This is different to the size of slab of Memcached as my understanding to
* that reference. (I don't comment on which is better in that the references
* can not drive us to any conclusion.)
*
* <p>
* ZMChunck is the final allocated memory area for use. Once a ZMPage is used
* to allocate ZMChuncks, then ZMPage is assigned to a sizeClass which indicated
* this ZMPage is for which size of ZMChunck. A sizeClassIndex(or sci for short
* in sources) is for indexing one size of the pre-allocate table(or said a
* continuous memory area in the offheap area). This is just for speeding and
* improving memory locality(all this can be done by on-heap objects).
*
* <p>
* ZMPage contains the metadata of ZMChunk. This is different to that the
* jemalloc uses RBTree. (I don't comment on which is better in that the
* references can not drive us to any conclusion.)
*
* One design tip with the ZMPage is that, the ZMPage is 2MB aligned in design,
* it make to reason one ZMChunk belongs to which ZMPage in two of fastest
* instructions in all of CPUs):
*
* <pre>
* long addressPage = addressChunk-(addressChunk&(SIZE_ZMPAGE-1));
* </pre>
*
* see this statement in {@link Allocator#free}
*
* <p>
* This gives big benefits: ZMChunk doesn't need to store any metadata. This
* shortens the critical path and decide how the way landz manage the metadata.
*
* <p>
* ZMPages are from a global pages pool, then a ZMPage should be in a thread
* pool to use. When ZMPage in the free state, it is can be freed to global pool
* when some conditions.
*
* <p>
* The operations inside of threadLocal pool, as the name indicated, are thread safe.
*
* <p>
* The Operations from threadLocal pool to global pool use the landz's
* {@link MPMCQueue}, which provide a high-throughput/low-latency lock-free
* bounded queue.
*
* <p>
* One important side of thread-safe design to allocator is the cross-thread
* free invocation. ZMalloc solves this like this:
* <ul>
* <li> metadata records the thread's tid when allocated, call ownedTid;
* <li> if free thread's tid found not match with ownedTid, then zmalloc just
* put the return that chunk to ownedTid's remote freed queue.
* <li> when some conditions happens, the remote freed queue will be drained for
* use;
* <li> remote freed queue is a careful design lock-free/ABA-free MPSC queue
* by Landz itself;
* </ul>
*
* <p>
* more detail comes here...
*
* <p>
* Namings in the methods:
* <ul>
* <li> pg_ is for page operations;
* <li> gp_ is for global pool operations;
* <li> tlp_ is for threadlocal pool operations;
*
* </ul>
*
*
*/
public class Allocator {
/**
* Note: most of variables are stored in the long type for future's extensions
* and/or 8B alignment, but we may only use the int/byte-size slot when
* long-size is not needed.
*/
private static final long NULL_ADDRESS = 0L;
// private static final int SIZE_LONG_TYPE = 8;
// private static final int SIZE_INT_TYPE = 4;
/**
* addressZM: zmalloc metadata area + global pool area
* arrange 8MB VM metadata area for zmalloc globals, may be
* reallocated in the future;
*/
private static final long addressRaw;
private static final long addressZM;
private static final long SIZE_METADATA_ZMALLOC = 1<<23;
/**
* 35 kinds of SizeClass now
*/
private static final long addressSizeClassInfo;
private static final int TLPHEAD_TO_SIZECLASSINFO = 4*1024;
/**
* off-heap ThreadLocalPools area desgin is an optimization for
* java ThreadLocal. It provide a better TLS implementation for zmalloc
* than ThreadLocal in long-running thread usage, like thread pool.
*
* But this design can not support that applications like to spawn huge mounts
* of short-living thread. Becuase threads can be generated infinitely, but
* off-heap is finally limited.
*
* By default, the first 3k-1 thread (id<3072) can have a TLP slot in the
* managed off heap. For the later spawning threads, it will go back to use
* slower {@link ThreadLocal}.
*
* TODO: j.l.ThreadLocal backed TLP schema has not implemented now...
*
* addressTLPs: address for ThreadLocalPools
* structure:
* (.. CacheLine aligned)
* long - number of TLPs
* [..to nextCacheline start]
* [..to nextCacheline start]
* TLPStates
* (note: this area has one-shot false sharing for long-running threads)
* byte - 0..
* byte - 1..
* byte - 2..
* byte - ..
* byte - 3071..
* [..to next 4096 offset start]
* (addressTLP#0)
* address[36] - AvailablePages
* - AvailablePage[0] - 3*8B: head, tail, num
* ..
* - AvailablePage[35]
* address - freePagesHead
* address - freePagesTail
* long - NumFreePages
* [..to 1024 offset start]
* address - remoteFreedChunksHead
* [..to nextCacheline start]
* [..to nextCacheline start]
* address - remoteFreedChunksTail
* [..to nextCacheline start]
* [..to nextCacheline start]
* address - remoteFreedChunksDummy
* [..to nextCacheline start]
* [..to nextCacheline start]
*
* address[] - batchRequestedPages (1-?) TODO: not used
* the TLP can request pages from GP in batch for performance.
* here, batchRequestedPages is for storing the returned batch
* requested pages. Now we support to 1 to ? varied number of
* pages in one TLP. It is possible to support larger number of
* pages after enlarging the TLP metadata area.
* attention: this area is *transient*, it is the related APIs'
* clients' responsibility to use this at its immediate
* availability.
*
* [..padding to 2048]
* (addressTLP#1)
* ..
*/
private static final long addressTLPHEAD;
private static final int TLPHEAD_NUMTLPS_OFFSET = 0;
private static final int TLPSTATES_TO_TLPHEAD_OFFSET = SIZE_CACHE_LINE_PADDING;
private static final long addressTLPStates;
private static final int SIZE_TLPStates_ARRAY_LENGTH = 3*1024;
private static final int TLPS_TO_TLPHEAD_OFFSET = 4*1024;
private static final long addressTLPs;
private static final int TLP_ITEM_SIZE = 2*1024;
private static final int TLP_AVAILABLEPAGES_OFFSET = 0;
private static final int TLP_AVAILABLEPAGES_ITEM_SIZE = 3*SIZE_LONG_TYPE;
private static final int TLP_AVAILABLEPAGES_ARRAYLENGTH = 36;
private static final int TLP_FREEPAGESHEAD_OFFSET =
TLP_AVAILABLEPAGES_ITEM_SIZE * TLP_AVAILABLEPAGES_ARRAYLENGTH;
private static final int TLP_FREEPAGESTAIL_OFFSET =
TLP_FREEPAGESHEAD_OFFSET + SIZE_LONG_TYPE;
private static final int TLP_NUMFREEPAGES_OFFSET =
TLP_FREEPAGESTAIL_OFFSET + SIZE_LONG_TYPE;
private static final int TLP_REMOTEFREEDCHUNKS_HEAD_OFFSET = 1024;
private static final int TLP_REMOTEFREEDCHUNKS_TAIL_OFFSET =
TLP_REMOTEFREEDCHUNKS_HEAD_OFFSET + SIZE_CACHE_LINE_PADDING;
private static final int TLP_REMOTEFREEDCHUNKS_DUMMY_OFFSET =
TLP_REMOTEFREEDCHUNKS_TAIL_OFFSET + SIZE_CACHE_LINE_PADDING;
private static final int TLP_BATCHREQUESTEDPAGES_OFFSET =
TLP_REMOTEFREEDCHUNKS_DUMMY_OFFSET + SIZE_CACHE_LINE_PADDING;
/**
* change this threshold value larger if you want more aggressive cache
*/
private static int freePagesNumThreshold;
/**
* change this return value smaller if you want more aggressive cache
*/
private static int freePagesNumToReturn;
private static final int FREEPAGES_NUM_THRESHOLD_DEFAULT = 64;
private static final int FREEPAGES_NUM_TORETURN_DEFAULT = 32;
//TODO: only use this optimization after enabling an configuration option
//Note: this can reduce the request cost more, but cause much initial
// memory footprint
private static final int NUM_BATCHREQUESTEDPAGES_DEFAULT = 4;//or 8? 16?
/**
* (..assumed Cacheline aligned for fist page)
* structure:
* address - availableChunks
* long - numAvailableChunks
* address - nextPage
* address - prevPage
* address - nextFreePage
* address - prevFreePage
* long - numMaxChunks
* int - sizeClass(int)
* int - tid(long)
* (...padding to next cache line)
*/
private static final long SHIFT_SIZE_ZMPAGE = 21;
private static final long SIZE_ZMPAGE = 1<<SHIFT_SIZE_ZMPAGE;
//FIXME: some in this head should be in cache line padding
private static final int ZMPAGE_AVAILABLECHUNKS_OFFSET = 0;
private static final int ZMPAGE_NUMAVAILABLECHUNKS_OFFSET = 1*SIZE_LONG_TYPE;
private static final int ZMPAGE_NEXTPAGE_OFFSET = 2*SIZE_LONG_TYPE;
private static final int ZMPAGE_PREVPAGE_OFFSET = 3*SIZE_LONG_TYPE;
private static final int ZMPAGE_NEXTFREEPAGE_OFFSET = 4*SIZE_LONG_TYPE;
private static final int ZMPAGE_PREVFREEPAGE_OFFSET = 5*SIZE_LONG_TYPE;
private static final int ZMPAGE_NUMMAXCHUNKS_OFFSET = 6*SIZE_LONG_TYPE;
private static final int ZMPAGE_SIZECLASSINDEX_OFFSET = 7*SIZE_LONG_TYPE;
private static final int ZMPAGE_TID_OFFSET = 7*SIZE_LONG_TYPE
+SIZE_INT_TYPE;
//FIXME: this offset should be in cache line padding
private static final int ZMPAGE_RAWCHUNK_OFFSET = SIZE_CACHE_LINE;
/**
* TODO: now > {@link #ZMPAGE_MAX_CHUNK_SIZE} is not supported
*/
private static final int ZMPAGE_MAX_CHUNK_SIZE =
(int) SIZE_ZMPAGE-ZMPAGE_RAWCHUNK_OFFSET;
/**
* 1GB VM or System for initial off heap pool, can grow into
* maximum 50% of total memory
* <p>
*
* This adjusting could be avoided by fixing the #ZMALLOC_INITIAL_POOLSIZE
* same to the #ZMALLOC_MAX_POOLSIZE
*
* <p>
*
* NOTE:
* 1. 1GB is just for VM, not physical memory, but set your System
* Property for avoiding kinds of problems for small servers.
* 2. we make the "effective" address of Global Pool(not including
* the metadata) page-aligned for kinds of goods.
* 3. only to access your requested memory area, otherwise you may
* crash JVM!
*
* structure:
* long - addressAvailablePages
* long - NumAvailablePages
*
*
*/
private static final long addressGP;
//TODO: add a dynamic change method?(note this may cost too long time or fail)
private static final long sizeGP;
private static final long totalAvailablepages;
//we use the last 1-cacheline-padded slots before GP for GP's meta head
private static final int GPHEAD_OFFSET = -1*SIZE_CACHE_LINE_PADDING;
// private static final long addressGPHead;
private static final long addressGPHead_NumAvailablePages;
private static final MPMCQueue globalPool;
static {
//config kinds of options
freePagesNumThreshold = Integer.parseInt(
Optional
.ofNullable(SystemProperty.ZMALLOC_FREEPAGES_NUM_THRESHOLD.value())
.orElse(String.valueOf(FREEPAGES_NUM_THRESHOLD_DEFAULT)));
freePagesNumToReturn = Integer.parseInt(
Optional
.ofNullable(SystemProperty.ZMALLOC_FREEPAGES_NUM_TORETURN.value())
.orElse(String.valueOf(FREEPAGES_NUM_TORETURN_DEFAULT)));
long initialGPSize = Long.parseLong(
Optional
.ofNullable(SystemProperty.ZMALLOC_INITIAL_POOLSIZE.value())
.orElse("1024"));
contract(
() -> Longs.isPowerOfTwo(initialGPSize),
() -> new IllegalArgumentException("Now the size of global pool " +
"is supported to be the power of 2 only."));//FIXME
sizeGP = initialGPSize * 1024 * 1024;//initialGPSize is in MB
totalAvailablepages = sizeGP/SIZE_ZMPAGE;
/* TECH NOTE:
* the malloc-ed VM will be included a metadata head (at least for linux
* glibc's). For mmap-ed block(so-called large chunk),
* this head's size is 2*SIZE_SZ, 16 in x86-64, or 8 in x86.
* So, the glibc's malloc allocated memory is not page aligned in default.
*/
addressRaw = systemAllocateMemory(
SIZE_METADATA_ZMALLOC
+ sizeGP
+ SIZE_ZMPAGE); //should align to zmalloc page
addressZM = nextZMPageAlignedAddress(addressRaw);
contract(()-> isZMPageAligned(addressZM));
/**
* NOTE:
* 1. 8B aligned;
* 2. read-only
*/
addressSizeClassInfo = addressZM;
onAddress(addressSizeClassInfo)
.put(8) // 0
.followBy(16) // 1
.followBy(24) // 2
.followBy(32) // 3
.followBy(48) // 4
.followBy(64) // 5
.followBy(96) // 6
.followBy(128) // 7
.followBy(192) // 8
.followBy(256) // 9
.followBy(384) // 10
.followBy(512) // 11
.followBy(768) // 12
.followBy(1024) // 13
.followBy(1536) // 14
.followBy(2048) // 15
.followBy(3072) // 16
.followBy(4096) // 17
.followBy(6144) // 18
.followBy(8192) // 19
.followBy(12288) // 20
.followBy(16384) // 21
.followBy(24576) // 22
.followBy(32768) // 23
.followBy(49152) // 24
.followBy(65536) // 25
.followBy(98304) // 26
.followBy(131072) // 27
.followBy(196608) // 28
.followBy(262144) // 29
.followBy(393216) // 30
.followBy(524288) // 31
.followBy(786432) // 32
.followBy(1048576) // 33
.followBy(1572864) // 34
.followBy(ZMPAGE_MAX_CHUNK_SIZE); // 35
addressTLPHEAD = addressSizeClassInfo + TLPHEAD_TO_SIZECLASSINFO;
// addressTLPHEAD+TLPHEAD_NUMTLPS_OFFSET->addressTLPHEAD,
// in that TLPHEAD_NUMTLPS_OFFSET=0
onAddress(addressTLPHEAD).put(0);//initial num counter for TLPs
addressTLPStates = addressTLPHEAD + TLPSTATES_TO_TLPHEAD_OFFSET;
//initialize TLPStates
for (int i = 0; i < SIZE_TLPStates_ARRAY_LENGTH; i++) {
UNSAFE.putByte(addressTLPStates+i,(byte)0);
}
//only initialize the main thread's ThreadLocalPools
addressTLPs = addressTLPHEAD + TLPS_TO_TLPHEAD_OFFSET;
contract(()-> is4096Aligned(addressTLPs));
addressGP = addressZM + SIZE_METADATA_ZMALLOC;
contract(()-> isPageAligned(addressGP));
addressGPHead_NumAvailablePages = addressGP + GPHEAD_OFFSET;
globalPool = new MPMCQueue((int)(initialGPSize/2));
//initialize the GP
gp_ini();
//NOTE: this hook will cause buffer finalization panic.
// Although it is paintless, but we can just rely on OS for cleaning.
// Runtime.getRuntime().addShutdownHook(
// new Thread(()->systemFreeMemory(addressRaw)));
//one-shot fence?
UNSAFE.storeFence();
}
//============================================================================
//zmalloc main APIs
/**
* Note: the allocated memory is not guarantee to be zero-ed. That is.
* it may contain any garbage.
*
* <p>
* contract: 0 < sizeOfBytes <= 1536k (may change in the future)
* <p> @param sizeOfBytes - the size which you want to request, in bytes
* <p> @return - the address of your requested chunk, or NULL_ADDRESS(0L) if
* the allocator can not fulfil your request.
*/
public static final long allocate(long sizeOfBytes) {
int sci = sizeClassIndex((int) sizeOfBytes);
//TODO: add contract here
long tid = currentThreadId();
//ensure the tlp area has been initialized
if (UNSAFE.getByte(addressTLPStates + tid) ==0)
tlp_ini(tid);
//prepare TLP variables
long addressTLP = addressTLPs + TLP_ITEM_SIZE *tid;
long addrAvailablePages = addressTLP + TLP_AVAILABLEPAGES_OFFSET;
long addrAvailablePageHead = addrAvailablePages +
sci* TLP_AVAILABLEPAGES_ITEM_SIZE;
long addrAvailablePageTail = addrAvailablePageHead + SIZE_LONG_TYPE;
long addrNumAvailablePage = addrAvailablePageTail + SIZE_LONG_TYPE;
long numAvailablePage = UNSAFE.getLong(addrNumAvailablePage);
long page;
if (numAvailablePage!=0) {
page = UNSAFE.getAddress(addrAvailablePageHead);
boolean isFreePage =
UNSAFE.getInt(page+ZMPAGE_NUMAVAILABLECHUNKS_OFFSET)==
UNSAFE.getInt(page+ZMPAGE_NUMMAXCHUNKS_OFFSET);
// FIXME: merged with next branch's pop?
// chunk = pg_AvailableChunks_pop(page);
if(isFreePage) {
//meets free page, but now it is not
long addrFreePagesHead = addressTLP + TLP_FREEPAGESHEAD_OFFSET;
long addrFreePagesTail = addressTLP + TLP_FREEPAGESTAIL_OFFSET;
long addrNumFreePages = addressTLP + TLP_NUMFREEPAGES_OFFSET;
tlp_FreePages_remove(
addrFreePagesHead, addrFreePagesTail, addrNumFreePages, page);
}
} else {
page = gp_Page_poll();
//check with RemoteFreedChunks
//TODO: do we have/need more better flush points?
long addrRemoteFreedChunksHead =
addressTLP + TLP_REMOTEFREEDCHUNKS_HEAD_OFFSET;
long addrRemoteFreedChunksTail =
addressTLP + TLP_REMOTEFREEDCHUNKS_TAIL_OFFSET;
long addrRemoteFreedChunksDummy =
addressTLP + TLP_REMOTEFREEDCHUNKS_DUMMY_OFFSET;
if (page == NULL_ADDRESS) {
//find in RemoteFreedChunks
long c;
while (NULL_ADDRESS != (c=tlp_RemoteFreedChunksHead_remove(
addrRemoteFreedChunksHead,
addrRemoteFreedChunksTail,
addrRemoteFreedChunksDummy)) ) {
long p = c-(c&(SIZE_ZMPAGE-1));
if (sci==UNSAFE.getInt(p + ZMPAGE_SIZECLASSINDEX_OFFSET)) {
return c;
} else {
free(c);
}
}
} else {
//flush whole RemoteFreedChunks queue
long c;
while (NULL_ADDRESS != (c=tlp_RemoteFreedChunksHead_remove(
addrRemoteFreedChunksHead,
addrRemoteFreedChunksTail,
addrRemoteFreedChunksDummy)) ) {
free(c);
}
}
pg_setupPage(page, sci, tid);
tlp_AvailablePages_addToHead(addrAvailablePageHead, page);
//new page should be a freePage as well
// tlp_FreePages_addToHead(addrFreePagesHead, page, addrNumFreePages);
}
//contract: page != NULL_ADDRESS
long chunk = pg_AvailableChunks_pop(page);
//we guarantee that we have chunks iff we have free pages
//so we should guarantee the invariant: chunk != NULL_ADDRESS
if (UNSAFE.getAddress(page) == NULL_ADDRESS) {
//meets full page(in head), just remove it
tlp_AvailablePages_remove(addrAvailablePageHead,
UNSAFE.getAddress(addrAvailablePageHead));
}
return chunk;
}
/**
* contract:
* the want-to-be-freed addressChunk should be allocated by
* {@link z.offheap.zmalloc.Allocator#allocate(long)}.
*/
public static final void free(long addressChunk) {
if (addressChunk<=0)
throw new IllegalArgumentException(
"addressChunk argument can't be less than zero.");
long tid = currentThreadId();
long addressPage = addressChunk-(addressChunk&(SIZE_ZMPAGE-1));
int sci = UNSAFE.getInt(addressPage + ZMPAGE_SIZECLASSINDEX_OFFSET);
int ownedTid = UNSAFE.getInt(addressPage + ZMPAGE_TID_OFFSET);
//the chunk is remote freed
if (tid!=ownedTid) {
long addrRemoteFreedChunksTail = addressTLPs + TLP_ITEM_SIZE *ownedTid
+ TLP_REMOTEFREEDCHUNKS_TAIL_OFFSET;
tlp_RemoteFreedChunksHead_add(addrRemoteFreedChunksTail,addressChunk);
return;
}
boolean isFullPage =
UNSAFE.getInt(addressPage+ZMPAGE_NUMAVAILABLECHUNKS_OFFSET)==0;
pg_AvailableChunks_push(addressPage,addressChunk);
if (isFullPage) {
tlp_AvailablePages_addToTail(tid, sci, addressPage);
}
//is free page
if (UNSAFE.getInt(addressPage+ZMPAGE_NUMAVAILABLECHUNKS_OFFSET)==
UNSAFE.getInt(addressPage+ZMPAGE_NUMMAXCHUNKS_OFFSET)) {
//TODO: add an option to configure whether to return freePages to GP?
long addressTLP = addressTLPs + TLP_ITEM_SIZE *tid;
long addrFreePagesHead = addressTLP + TLP_FREEPAGESHEAD_OFFSET;
long addrNumFreePages = addressTLP + TLP_NUMFREEPAGES_OFFSET;
tlp_FreePages_addToHead(addrFreePagesHead, addressPage, addrNumFreePages);
if (UNSAFE.getLong(addrNumFreePages) > freePagesNumThreshold) {
//TODO: add batch return method?
//NOTE: freePages include different sizeClass's pages
//TODO: separate the freePages to different sizeClasses?
long addrFreePagesTail = addrFreePagesHead + SIZE_LONG_TYPE;
for (int i=0;i< freePagesNumToReturn;i++) {
//remove from head
long head = UNSAFE.getAddress(addrFreePagesHead);
//here contract: fpHead!=NULL_ADDRESS
tlp_FreePages_remove(
addrFreePagesHead, addrFreePagesTail, addrNumFreePages, head);
long addrAvailablePageHead = addressTLP + TLP_AVAILABLEPAGES_OFFSET +
UNSAFE.getInt(head + ZMPAGE_SIZECLASSINDEX_OFFSET)
* TLP_AVAILABLEPAGES_ITEM_SIZE;
//FIXME: addrAvailablePageHead is sci-based,
// and then tlp_AvailablePages_remove is error-prone
tlp_AvailablePages_remove(addrAvailablePageHead,head);
gp_Page_offer(head);
}
}
}
}
//============================================================================
//internal operations
//============================================================================
//internal utils
private static final long nextZMPageAlignedAddress(long address) {
return address-(address&(SIZE_ZMPAGE-1))+SIZE_ZMPAGE;
}
private static final boolean isZMPageAligned(long address) {
return (address&(SIZE_ZMPAGE-1))==0;
}
private static final long next4096AlignedAddress(long address) {
return address-(address&4095)+4096;
}
private static final boolean is4096Aligned(long address) {
return (address&4095)==0;
}
// private static int[] sizeClassTable0;
// private static int[] sizeClassTable1;
// private static int[] sizeClassTable2;
//
// /**
// *
// * XXX: although this table based way beats sizeClassIndex0 in micro-bench,
// * it should be tested against real world usage.
// *
// */
// public static final int sizeClassIndex(int sizeOfBytes) {
// if (sizeOfBytes<=0)
// return 0;
//
// sizeOfBytes--;
// int level0 = sizeOfBytes >>> 9;
// int level1 = sizeOfBytes >>> 15;
// int level2 = sizeOfBytes >>> 21;
//
// if (level0==0) {
// return sizeClassTable0[sizeOfBytes>>>2];
// } else if(level1==0) {
// return sizeClassTable1[sizeOfBytes>>>8];
// } else if(level2==0) {
// return sizeClassTable2[sizeOfBytes>>>14];
// } else {
// return 34;
// }
// }
//
// private static final void createSizeClassLookupTable() {
// sizeClassTable0 = new int[128];//0 - 511
// sizeClassTable1 = new int[128];//512 - 32k-1
// sizeClassTable2 = new int[128];//32k - 2M-1
//
// for (int i = 4; i <= 512; i+=4) {
// sizeClassTable0[(i-1)>>>2] = sizeClassIndex0(i);
// }
//
// for (int i = 256; i <= 32*1024; i+=256) {
// sizeClassTable1[(i-1)>>>8] = sizeClassIndex0(i);
// }
//
// for (int i = 16*1024; i <= 2*1024*1024; i+=(16*1024)) {
// sizeClassTable2[(i-1)>>>14] = sizeClassIndex0(i);
// }
// }
/**
* NOTE: now sizeOfBytes > {@link #ZMPAGE_MAX_CHUNK_SIZE} is not supported
*/
private static final int sizeClassIndex(int sizeOfBytes) {
//contract(()->sizeOfBytes!=0);
if (sizeOfBytes>1572864) {
//TODO: do we support {@link #ZMPAGE_MAX_CHUNK_SIZE} to public?
return 35;
}else if (sizeOfBytes>16) {
int clg = (63-Long.numberOfLeadingZeros(sizeOfBytes));
int upSizeClass1 = 1<<clg;
int upSizeClass2 = (1<<clg)+(1<<(clg-1));
if (sizeOfBytes == upSizeClass1) {
return (clg-3)*2-1;
} else if (sizeOfBytes > upSizeClass2) {
return (clg-3)*2+1;
} else {
return (clg-3)*2;
}
} else if (sizeOfBytes>8) {
return 1;
} else {
return 0;
}
}
//============================================================================
//zmalloc Page operations
private static final void pg_setupPage(long addressPage,
int sizeClassIndex,
long tid) {
int sizeClass = UNSAFE.getInt(addressSizeClassInfo+
sizeClassIndex*SIZE_INT_TYPE);
int numMaxAvailableChunks = ZMPAGE_MAX_CHUNK_SIZE/sizeClass;
/*
*reset the availableChunks to NULL ? do not need to reset this field here
* if reset done in tlp_ini
*/
// UNSAFE.putAddress(addressPage+ZMPAGE_NEXTPAGE_OFFSET,0);
UNSAFE.putAddress(addressPage, NULL_ADDRESS);
long addressRawChunks = addressPage+ZMPAGE_RAWCHUNK_OFFSET;
for (int i = 0; i < numMaxAvailableChunks; i++) {
long chunk = addressRawChunks+i*sizeClass;
pg_AvailableChunks_push(addressPage, chunk);
}
UNSAFE.putInt(addressPage + ZMPAGE_NUMAVAILABLECHUNKS_OFFSET,
numMaxAvailableChunks);
UNSAFE.putInt(addressPage + ZMPAGE_NUMMAXCHUNKS_OFFSET,
numMaxAvailableChunks);
UNSAFE.putInt(addressPage + ZMPAGE_SIZECLASSINDEX_OFFSET,
sizeClassIndex);
//FIXME: tid is long type, but now we only support integer number tid
UNSAFE.putInt(addressPage + ZMPAGE_TID_OFFSET, (int) tid);
}
private static final void pg_AvailableChunks_push(long addressPage,
long addressChunk) {
//availableChunks = addressPage
long head = UNSAFE.getAddress(addressPage);
UNSAFE.putAddress(addressPage, addressChunk);
UNSAFE.putAddress(addressChunk, head);
long numAvailableChunks = addressPage+ZMPAGE_NUMAVAILABLECHUNKS_OFFSET;
UNSAFE.putInt(numAvailableChunks, UNSAFE.getInt(numAvailableChunks) + 1);
}
private static final long pg_AvailableChunks_pop(long addressPage) {
//long availableChunks = addressPage+ZMPAGE_AVAILABLECHUNKS_OFFSET;
long head = UNSAFE.getAddress(addressPage);
if (head != NULL_ADDRESS) {
UNSAFE.putAddress(addressPage, UNSAFE.getAddress(head));
long numAvailableChunks = addressPage+ZMPAGE_NUMAVAILABLECHUNKS_OFFSET;
UNSAFE.putInt(numAvailableChunks, UNSAFE.getInt(numAvailableChunks) - 1);
}//meet a full zmalloc page when head ==0L
return head;
}
//============================================================================
//GP operations
private static final void gp_ini() {
for (long i = 0; i < totalAvailablepages; i++) {
long availableGPZMPage = addressGP + i*SIZE_ZMPAGE;
gp_Page_offer(availableGPZMPage);
}
}
/**
* NOTE:<p>
* the return zmalloc page is raw, it is the responsibility of TLP to
* setup the returned page for its use.
*
* @return the address of raw zmalloc page
*/
private static final long gp_Page_poll() {
long page = globalPool.poll();
if (page!= MPMCQueue.NULL) {
UNSAFE.getAndAddLong(null, addressGPHead_NumAvailablePages, -1L);
return page;
} else {
return NULL_ADDRESS;
}
}
private static final void gp_Page_offer(long addressFreePage) {
// int nRetries = 3;//TODO
// for (int i = 0; i < nRetries; i++) {
if (globalPool.offer(addressFreePage)) {
UNSAFE.getAndAddLong(null, addressGPHead_NumAvailablePages, 1L);
return;
// }
}
throw new RuntimeException("can not offer the page to the global pool, " +
"but this should not happen...");
}
// /**
// * TODO: not used
// * NOTE:<p>
// * in batch, the requested pages will be put into the corresponding
// * batchRequestedPages area.
// *
// */
// private static final void gp_Page_pop_batch(long tid,
// int numOfRequestedPages) {
// long tlpBatchRequestedPage = addressTLPs+ TLP_ITEM_SIZE *tid
// + TLP_BATCHREQUESTEDPAGES_OFFSET;
//
// long page = UNSAFE.getAddress(addressGPHead_AvailablePagesHead);
// for (int i = 0; i < numOfRequestedPages; i++) {
// if (page==NULL_ADDRESS)
// break;
// UNSAFE.putAddress(tlpBatchRequestedPage + i * SIZE_LONG_TYPE, page);
// page = UNSAFE.getAddress(page);
// }
// UNSAFE.putAddress(addressGPHead_AvailablePagesHead,page);
//
// long newNum = UNSAFE.getLong(addressGPHead_NumAvailablePages)-
// numOfRequestedPages;
// UNSAFE.putLong(addressGPHead_NumAvailablePages, (newNum>0)?newNum:0);
// }
//============================================================================
//TLP operations
private static final void tlp_ini(long tid) {
long addressTLP = addressTLPs + TLP_ITEM_SIZE *tid;
long availablePages = addressTLP;
for (int i = 0; i < TLP_AVAILABLEPAGES_ARRAYLENGTH; i++) {
long availablePageHead = availablePages + i* TLP_AVAILABLEPAGES_ITEM_SIZE;
long availablePageTail = availablePageHead + SIZE_LONG_TYPE;
//reset the available pages array
UNSAFE.putAddress(availablePageHead,availablePageTail);
UNSAFE.putAddress(availablePageTail,availablePageHead);//TODO
UNSAFE.putLong(availablePageTail+SIZE_LONG_TYPE, 0);
//XXX: not necessary for numAvailablePage
}
long addrFreePagesHead = addressTLP + TLP_FREEPAGESHEAD_OFFSET;
long addrFreePagesTail = addressTLP + TLP_FREEPAGESTAIL_OFFSET;
UNSAFE.putAddress(addrFreePagesHead,addrFreePagesTail);
UNSAFE.putAddress(addrFreePagesTail,addrFreePagesHead);
//XXX: not necessary for numFreePages
long addrRemoteFreedChunksHead =
addressTLP + TLP_REMOTEFREEDCHUNKS_HEAD_OFFSET;
long addrRemoteFreedChunksTail =
addressTLP + TLP_REMOTEFREEDCHUNKS_TAIL_OFFSET;
long addrRemoteFreedChunksDummy =
addressTLP + TLP_REMOTEFREEDCHUNKS_DUMMY_OFFSET;
UNSAFE.putAddress(addrRemoteFreedChunksDummy,NULL_ADDRESS);//not necessary
UNSAFE.putAddress(addrRemoteFreedChunksHead,addrRemoteFreedChunksDummy);
UNSAFE.putAddress(addrRemoteFreedChunksTail,addrRemoteFreedChunksDummy);
UNSAFE.putByte(addressTLPStates + tid, (byte) 1);//set TLPStates
UNSAFE.getAndAddInt(null, addressTLPHEAD, 1);
}
private static final void tlp_AvailablePages_addToHead(
long addrAvailablePageHead,
long newAvailablePage) {
long addrNumAvailablePage = addrAvailablePageHead+2*SIZE_LONG_TYPE;
long numAvailablePage = UNSAFE.getLong(addrNumAvailablePage);
long oldHead = UNSAFE.getAddress(addrAvailablePageHead);
if (numAvailablePage != 0) {//oldHead!=availablePageTail
UNSAFE.putAddress(oldHead + ZMPAGE_PREVPAGE_OFFSET,
newAvailablePage);
}else {//oldHead is tail
UNSAFE.putAddress(oldHead, newAvailablePage);
}
UNSAFE.putAddress(newAvailablePage + ZMPAGE_NEXTPAGE_OFFSET, oldHead);
UNSAFE.putAddress(newAvailablePage + ZMPAGE_PREVPAGE_OFFSET,
addrAvailablePageHead);
UNSAFE.putAddress(addrAvailablePageHead, newAvailablePage);
UNSAFE.putLong(addrNumAvailablePage, numAvailablePage + 1);
}
private static final void tlp_AvailablePages_addToTail(
long tid,
int sizeClassIndex,
long newAvailablePage) {
long addrAvailablePages = addressTLPs + TLP_ITEM_SIZE *tid;
// long availablePageHead = availablePages +
// sizeClassIndex*TLP_AVAILABLEPAGES_ITEM_SIZE;
long addrAvailablePageTail = addrAvailablePages +
sizeClassIndex* TLP_AVAILABLEPAGES_ITEM_SIZE +
SIZE_LONG_TYPE;
long addrNumAvailablePage = addrAvailablePageTail+SIZE_LONG_TYPE;
long numAvailablePage = UNSAFE.getLong(addrNumAvailablePage);
long oldTail = UNSAFE.getAddress(addrAvailablePageTail);
if (numAvailablePage != 0) {//oldTail is not addrAvailablePageHead
UNSAFE.putAddress(oldTail + ZMPAGE_NEXTPAGE_OFFSET,
newAvailablePage);
} else {//oldTail is head
UNSAFE.putAddress(oldTail,newAvailablePage);
}
UNSAFE.putAddress(newAvailablePage+ZMPAGE_PREVPAGE_OFFSET,oldTail);
UNSAFE.putAddress(newAvailablePage+ZMPAGE_NEXTPAGE_OFFSET,
addrAvailablePageTail);
UNSAFE.putAddress(addrAvailablePageTail,newAvailablePage);
UNSAFE.putLong(addrNumAvailablePage, numAvailablePage+1);
}
//TODO: some state changes may be not necessary?
/**
* contract:
* <p> 1. numAvailablePage > 0
* <p> 2. removedPage in AvailablePages[sizeClassIndex]
*/
private static final void tlp_AvailablePages_remove(
long addrAvailablePageHead,
long removedPage) {
long availablePageTail = addrAvailablePageHead + SIZE_LONG_TYPE;
// long head = UNSAFE.getAddress();
// long tail = UNSAFE.getAddress();
long addrPrevPage = removedPage+ZMPAGE_PREVPAGE_OFFSET;
long addrNextPage = removedPage+ZMPAGE_NEXTPAGE_OFFSET;
long prev = UNSAFE.getAddress(addrPrevPage);
long next = UNSAFE.getAddress(addrNextPage);
if (prev!=addrAvailablePageHead) {
UNSAFE.putAddress(prev+ZMPAGE_NEXTPAGE_OFFSET,next);
} else {//prev is head
UNSAFE.putAddress(prev,next);
}
if (next!=availablePageTail) {
UNSAFE.putAddress(next+ZMPAGE_PREVPAGE_OFFSET,prev);
} else {//next is tail
UNSAFE.putAddress(next,prev);
}
long addrNumAvailablePage = availablePageTail+SIZE_LONG_TYPE;
UNSAFE.putLong(addrNumAvailablePage,
UNSAFE.getLong(addrNumAvailablePage)-1);
}
//============================
private static final void tlp_FreePages_addToHead(long addrFreePagesHead,
long newFreePage,
long addrNumFreePages) {
long numFreePages = UNSAFE.getLong(addrNumFreePages);
long oldHead = UNSAFE.getAddress(addrFreePagesHead);
if (numFreePages != 0) {//oldHead!=availablePageTail
UNSAFE.putAddress(oldHead + ZMPAGE_PREVFREEPAGE_OFFSET, newFreePage);
}else {//oldHead is tail
UNSAFE.putAddress(oldHead, newFreePage);
}
UNSAFE.putAddress(newFreePage + ZMPAGE_NEXTFREEPAGE_OFFSET, oldHead);
UNSAFE.putAddress(newFreePage + ZMPAGE_PREVFREEPAGE_OFFSET,
addrFreePagesHead);
UNSAFE.putAddress(addrFreePagesHead, newFreePage);
UNSAFE.putLong(addrNumFreePages, numFreePages + 1);
}
private static final void tlp_FreePages_remove(long addrFreePagesHead,
long addrFreePagesTail,
long addrNumFreePages,
long removedFreePage) {
long addrPrevFreePage = removedFreePage+ZMPAGE_PREVFREEPAGE_OFFSET;
long addrNextFreePage = removedFreePage+ZMPAGE_NEXTFREEPAGE_OFFSET;
long prev = UNSAFE.getAddress(addrPrevFreePage);
long next = UNSAFE.getAddress(addrNextFreePage);
if (prev!=addrFreePagesHead) {
UNSAFE.putAddress(prev+ZMPAGE_NEXTFREEPAGE_OFFSET,next);
} else {//prev is head
UNSAFE.putAddress(prev,next);
}
if (next!=addrFreePagesTail) {
UNSAFE.putAddress(next+ZMPAGE_PREVFREEPAGE_OFFSET,prev);
} else {//next is tail
UNSAFE.putAddress(next,prev);
}
UNSAFE.putLong(addrNumFreePages, UNSAFE.getLong(addrNumFreePages)-1);
}
//============================================================================
private static final void tlp_RemoteFreedChunksHead_add(
long addrRemoteFreedChunksTail,
long addrRemoteFreedChunk) {
UNSAFE.putAddress(addrRemoteFreedChunk,NULL_ADDRESS);
// UNSAFE.fullFence();
long oldTail = UNSAFE.getAndSetLong(null,
addrRemoteFreedChunksTail, addrRemoteFreedChunk);
UNSAFE.putAddress(oldTail, addrRemoteFreedChunk);
UNSAFE.fullFence();
}
/*
* TODO: here we have no ABA problem: because we have a single consumer here,
* it is not possible to change the head without this method itself.
*/
private static final long tlp_RemoteFreedChunksHead_remove(
long addrRemoteFreedChunksHead,
long addrRemoteFreedChunksTail,
long addrRemoteFreedChunksDummy) {
UNSAFE.loadFence();
long head = UNSAFE.getAddress(addrRemoteFreedChunksHead);
long v = UNSAFE.getAddress(head);
if (v!=NULL_ADDRESS) {
long next = UNSAFE.getAddress(v);
if (next==NULL_ADDRESS) {//v==tail(==getAddr(addrRemoteFreedChunksTail))
if ( UNSAFE.compareAndSwapLong(null,
addrRemoteFreedChunksTail,
v,
addrRemoteFreedChunksDummy) ) {
return v;
} else {
return NULL_ADDRESS;
}
} else {
UNSAFE.putAddress(head, next);
}
UNSAFE.fullFence();//
return v;
} else {
return NULL_ADDRESS;
}
}
//============================================================================
//Stats API
/**
* ManagedPoolStats provides statistics for the managed off-heap pool
*
*/
public static class ManagedPoolStats {
public static long totalManagedPoolSize() {
return sizeGP;
}
public static long currentNumOfGPAvaiablePages() {
return UNSAFE.getLongVolatile(null, addressGPHead_NumAvailablePages);
}
public static long currentNumOfTLPAvaiablePages(int sizeClassIndex) {
return currentNumOfTLPAvaiablePages(currentThreadId(), sizeClassIndex);
}
public static long currentNumOfTLPAvaiablePages(long tid,
int sci) {
long addressTLP = addressTLPs + TLP_ITEM_SIZE *tid;
long addrAvailablePages = addressTLP + TLP_AVAILABLEPAGES_OFFSET;
long addrAvailablePageHead = addrAvailablePages +
sci* TLP_AVAILABLEPAGES_ITEM_SIZE;
long addrNumAvailablePage = addrAvailablePageHead+2*SIZE_LONG_TYPE;
return UNSAFE.getLongVolatile(null, addrNumAvailablePage);
}
public static long currentNumOfTLPFreePages() {
return currentNumOfTLPFreePages(currentThreadId());
}
public static long currentNumOfTLPFreePages(long tid) {
long numFreePages = addressTLPs + TLP_ITEM_SIZE *tid + TLP_NUMFREEPAGES_OFFSET;
return UNSAFE.getLong(numFreePages);
}
public static int currentNumOfPageAvailableChunks(long addressPage) {
return UNSAFE.getInt(addressPage+ZMPAGE_NUMAVAILABLECHUNKS_OFFSET);
}
public static long currentNumOfTLPs() {
return UNSAFE.getIntVolatile(null, addressTLPHEAD);//XXX: TLPHEAD_NUMTLPS_OFFSET=0
}
public static final long queryAllocationSize(int sizeOfBytes) {
int sci = sizeClassIndex((int) sizeOfBytes);
return UNSAFE.getInt(addressSizeClassInfo+sci*SIZE_INT_TYPE);
}
}
}