/*
* This file is part of the Jikes RVM project (http://jikesrvm.org).
*
* This file is licensed to You under the Eclipse Public License (EPL);
* You may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.opensource.org/licenses/eclipse-1.0.php
*
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership.
*/
package org.mmtk.plan.gpu;
import org.mmtk.plan.Phase;
import org.mmtk.plan.Plan;
import org.mmtk.plan.StopTheWorld;
import org.mmtk.plan.Trace;
import org.mmtk.plan.TransitiveClosure;
import org.mmtk.policy.MarkSweepSpace;
import org.mmtk.policy.RawPageSpace;
import org.mmtk.policy.Space;
import org.mmtk.utility.Log;
import org.mmtk.utility.deque.AddressDeque;
import org.mmtk.utility.deque.SharedDeque;
import org.mmtk.utility.heap.Mmapper;
import org.mmtk.utility.heap.VMRequest;
import org.mmtk.utility.options.Options;
import org.mmtk.vm.Lock;
import org.mmtk.vm.VM;
import org.vmmagic.pragma.Inline;
import org.vmmagic.pragma.Interruptible;
import org.vmmagic.pragma.Uninterruptible;
import org.vmmagic.unboxed.*;
/**
* This class implements the global state of a simple mark-sweep collector.
*
* All plans make a clear distinction between <i>global</i> and
* <i>thread-local</i> activities, and divides global and local state
* into separate class hierarchies. Global activities must be
* synchronized, whereas no synchronization is required for
* thread-local activities. There is a single instance of Plan (or the
* appropriate sub-class), and a 1:1 mapping of PlanLocal to "kernel
* threads" (aka CPUs). Thus instance
* methods of PlanLocal allow fast, unsychronized access to functions such as
* allocation and collection.
*
* The global instance defines and manages static resources
* (such as memory and virtual memory resources). This mapping of threads to
* instances is crucial to understanding the correctness and
* performance properties of MMTk plans.
*/
@Uninterruptible
public class GPU extends StopTheWorld {
/****************************************************************************
* Additional phases for the GPU plan.
*/
public static final short PRE_CLOSURE = Phase.createSimple("gpu-pre-closure");
public static final short GPU_REF_GRAPH_FILL = Phase.createSimple("gpu-ref-graph-fill");
public static final short GPU = Phase.createSimple("gpu");
public static final short GPU_REF_GRAPH_PROCESS = Phase.createSimple("gpu-ref-graph-process");
public static final short POST_CLOSURE = Phase.createSimple("gpu-post-closure");
// Aligns a pointer to the next starting point for a reference graph entry.
protected static Address alignRefSpacePtr(Address ptr) {
int offset = ptr.toInt() - gpuRefSpace.getStart().toInt();
if (offset % (4*4) == (3*4))
return ptr;
else
return ptr.plus((3*4) - (offset % (4*4))); //Address.fromIntZeroExtend((offset & (~0x3)) + 0x4 + 0x3);
}
protected static final short gpuClosurePhase = Phase.createComplex("gpu-closure", null,
Phase.scheduleGlobal (PRE_CLOSURE),
Phase.scheduleCollector (PRE_CLOSURE),
Phase.scheduleGlobal (GPU_REF_GRAPH_FILL),
Phase.scheduleGlobal (GPU),
Phase.scheduleGlobal (GPU_REF_GRAPH_PROCESS),
Phase.scheduleGlobal (POST_CLOSURE),
Phase.scheduleCollector (POST_CLOSURE));
protected static final short gpuRootClosurePhase = Phase.createComplex("initial-closure", null,
Phase.scheduleMutator (PREPARE),
Phase.scheduleGlobal (PREPARE),
Phase.scheduleCollector (PREPARE),
Phase.scheduleComplex (prepareStacks),
Phase.scheduleCollector (PRECOPY),
Phase.scheduleCollector (STACK_ROOTS),
Phase.scheduleCollector (ROOTS),
Phase.scheduleGlobal (ROOTS),
Phase.scheduleComplex (gpuClosurePhase));
protected static final AddressArray gpuAddresses = AddressArray.create(100000);
// Reference graph
public static final Space gpuRefSpace = new RawPageSpace("gpu-refs", 0, VMRequest.create(256, false));
public static final Lock gpuRefSpaceLock = VM.newLock("gpu-refs");
public static Address gpuRefSpacePtr = Address.zero();
@Inline
public static void allocateRefSpaceNode(ObjectReference ref) {
int numRefs = VM.gpu.getObjectNumRefs(ref);
gpuRefSpaceLock.acquire();
Address node = alignRefSpacePtr(gpuRefSpacePtr);
gpuRefSpacePtr = node.plus((2 + numRefs) * BYTES_IN_ADDRESS);
gpuRefSpaceLock.release();
ref.toAddress().store(node, VM.objectModel.GC_HEADER_OFFSET());
node.store(ref);
node.store(numRefs, Offset.fromIntZeroExtend(BYTES_IN_ADDRESS));
}
@Uninterruptible
protected static class FillOutRefs extends TransitiveClosure {
private Address curEdge;
@Override
public void processEdge(ObjectReference source, Address slot) {
ObjectReference dest = slot.loadObjectReference();
Address destNode = Address.zero();
if (!dest.isNull()) {
destNode = dest.toAddress().loadAddress(VM.objectModel.GC_HEADER_OFFSET());
}
curEdge.store(destNode);
curEdge = curEdge.plus(BYTES_IN_ADDRESS);
}
@Inline
public void run() {
Address curNode = alignRefSpacePtr(gpuRefSpace.getStart());
while (curNode.LT(gpuRefSpacePtr)) {
ObjectReference obj = curNode.loadObjectReference();
int numRefs = curNode.loadInt(Offset.fromIntZeroExtend(BYTES_IN_ADDRESS));
curEdge = curNode.plus(2 * BYTES_IN_ADDRESS);
VM.scanning.scanObject(this, obj);
curNode = alignRefSpacePtr(curNode.plus((2 + numRefs) * BYTES_IN_ADDRESS));
if (VM.VERIFY_ASSERTIONS)
VM.assertions._assert(alignRefSpacePtr(curEdge).EQ(curNode));
}
}
}
protected static final FillOutRefs gpuRefSpaceFiller = new FillOutRefs();
@Uninterruptible
protected static class MarkObjects extends TransitiveClosure {
@Inline
public void processNode(ObjectReference object) {
// Do nothing, tracing was already done
}
@Inline
public ObjectReference traceObject(ObjectReference object) {
if (Space.isInSpace(MARK_SWEEP, object))
return msSpace.traceObject(this, object);
if (Space.isInSpace(Plan.IMMORTAL, object))
return Plan.immortalSpace.traceObject(this, object);
if (Space.isInSpace(Plan.LOS, object))
return Plan.loSpace.traceObject(this, object);
if (Space.isInSpace(Plan.NON_MOVING, object))
return Plan.nonMovingSpace.traceObject(this, object);
if (Plan.USE_CODE_SPACE && Space.isInSpace(Plan.SMALL_CODE, object))
return Plan.smallCodeSpace.traceObject(this, object);
if (Plan.USE_CODE_SPACE && Space.isInSpace(Plan.LARGE_CODE, object))
return Plan.largeCodeSpace.traceObject(this, object);
if (VM.VERIFY_ASSERTIONS) {
// Log.write("Failing object => "); Log.writeln(object);
Space.printVMMap();
VM.assertions._assert(false, "No special case for space in traceObject");
}
return ObjectReference.nullReference();
}
}
protected static final MarkObjects gpuMarker = new MarkObjects();
public GPU() {
super();
// Collector threads collect all pointers into GPU space locally, then push them
// to the shared gpuQueue when finished. GPU (global) pulls from gpuQueue during
// the global GPU collection phase.
gpuQueue.prepareNonBlocking();
collection = Phase.createComplex("collection", null,
Phase.scheduleComplex(initPhase),
Phase.scheduleComplex(gpuRootClosurePhase),
//Phase.scheduleComplex(refTypeClosurePhase),
//Phase.scheduleComplex(forwardPhase),
Phase.scheduleComplex(completeClosurePhase),
Phase.scheduleComplex(finishPhase)
);
}
/****************************************************************************
* Class variables
*/
public static final MarkSweepSpace msSpace = new MarkSweepSpace("ms", DEFAULT_POLL_FREQUENCY, VMRequest.create());
public static final int MARK_SWEEP = msSpace.getDescriptor();
public static final int SCAN_MARK = 0;
/****************************************************************************
* Instance variables
*/
public final Trace msTrace = new Trace(metaDataSpace);
public final SharedDeque gpuQueue = new SharedDeque("gpuQueue",metaDataSpace, 1);
public final AddressDeque myQueue = new AddressDeque("global gpu queue", gpuQueue);
/*****************************************************************************
* Collection
*/
/**
* Perform a (global) collection phase.
*
* @param phaseId Collection phase to execute.
*/
@Inline
@Override
public void collectionPhase(short phaseId) {
if (phaseId == PREPARE) {
super.collectionPhase(phaseId);
msTrace.prepare();
msSpace.prepare(true);
return;
}
if (phaseId == PRE_CLOSURE) {
//Log.writeln("[GPUGC] Pre-closure (global)...");
msTrace.prepare();
return;
}
if (phaseId == GPU_REF_GRAPH_FILL) {
gpuRefSpaceFiller.run();
return;
}
if (phaseId == GPU) {
int count = 0;
while (myQueue.isNonEmpty())
gpuAddresses.set(count++, myQueue.pop());
// Log.write("Performing GPU GC, root set size = ");
//Log.writeln(count);
VM.gpu.traceArray(gpuAddresses, count, gpuRefSpace.getStart(), gpuRefSpacePtr);
return;
}
if (phaseId == GPU_REF_GRAPH_PROCESS) {
// Transfer marks from reference graph to objects, while compacting the reference graph
Address from = alignRefSpacePtr(gpuRefSpace.getStart());
Address to = from;
while (from.LT(gpuRefSpacePtr)) {
//Log.write("Write back ");
//Log.write(from);
//Log.write(" -> ");
//Log.write(to);
//Log.write(": ");
int numRefs = from.loadInt(Offset.fromIntZeroExtend(BYTES_IN_ADDRESS));
//Log.writeln(numRefs);
if (numRefs < 0) { // Object is reachable
numRefs &= 0x3FFFFFFF;
ObjectReference obj = from.loadObjectReference();
obj.toAddress().store(to, VM.objectModel.GC_HEADER_OFFSET());
to.store(obj);
to.store(numRefs, Offset.fromIntZeroExtend(BYTES_IN_ADDRESS));
to = alignRefSpacePtr(to.plus((2 + numRefs) * BYTES_IN_ADDRESS));
gpuMarker.traceObject(obj);
}
from = alignRefSpacePtr(from.plus((2 + numRefs) * BYTES_IN_ADDRESS));
}
//Log.write(" Done, old end = ");
//Log.write(from);
//Log.write(", new end = ");
//Log.writeln(to);
gpuRefSpacePtr = to;
return;
}
if (phaseId == POST_CLOSURE) {
// Log.writeln("[GPUGC] Post-closure (global)...");
msTrace.prepare();
return;
}
if (phaseId == CLOSURE) {
// Log.writeln("[GPUGC] Regular closure (global)...");
msTrace.prepare();
return;
}
if (phaseId == RELEASE) {
msTrace.release();
msSpace.release();
super.collectionPhase(phaseId);
return;
}
super.collectionPhase(phaseId);
}
/*****************************************************************************
* Accounting
*/
/**
* Return the number of pages reserved for use given the pending
* allocation. The superclass accounts for its spaces, we just
* augment this with the mark-sweep space's contribution.
*
* @return The number of pages reserved given the pending
* allocation, excluding space reserved for copying.
*/
@Override
public int getPagesUsed() {
return (msSpace.reservedPages() + super.getPagesUsed());
}
/**
* Calculate the number of pages a collection is required to free to satisfy
* outstanding allocation requests.
*
* @return the number of pages a collection is required to free to satisfy
* outstanding allocation requests.
*/
@Override
public int getPagesRequired() {
return super.getPagesRequired() + msSpace.requiredPages();
}
/*****************************************************************************
* Miscellaneous
*/
@Interruptible
@Override
public void boot() {
super.boot();
//It would be nice if we could replace this demand mapped region with
// a normal malloc call since that would speed up the movement of memory
// to the GC space, but I don't see any easy way to do that within jikes.
gpuRefSpacePtr = gpuRefSpace.getStart();
VM.memory.dzmmap(gpuRefSpacePtr, gpuRefSpace.getExtent().toInt());
}
@Interruptible
@Override
public void postBoot() {
super.postBoot();
if (!Options.noReferenceTypes.getValue()) {
Log.writeln("should use -X:gc:noReferenceTypes=true");
}
}
/**
* @see org.mmtk.plan.Plan#willNeverMove
*
* @param object Object in question
* @return True if the object will never move
*/
@Override
public boolean willNeverMove(ObjectReference object) {
if (Space.isInSpace(MARK_SWEEP, object))
return true;
return super.willNeverMove(object);
}
/**
* Register specialized methods.
*/
@Interruptible
@Override
protected void registerSpecializedMethods() {
TransitiveClosure.registerSpecializedScan(SCAN_MARK, GPUTraceLocal.class);
super.registerSpecializedMethods();
}
@Interruptible
public void planHarnessBegin() {
//run the gc, setup jikes reporting, etc..
super.planHarnessBegin();
VM.gpu.harnessBegin();
}
}