GPU.java example

Explorer
gpu-garbage-collection-master
- benchmarks
  - micro
    - ArrayList2.java
    - BigArray.java
    - BigGCTest
      - BigGCTest.java
      - Node.java
    - Empty.java
    - LinkedList.java
    - LinkedListX256.java
    - Turnover.java
    - basic-cgc
      - GCTest.java
      - Node.java
- jikesrvm
/*
 *  This file is part of the Jikes RVM project (http://jikesrvm.org).
 *
 *  This file is licensed to You under the Eclipse Public License (EPL);
 *  You may not use this file except in compliance with the License. You
 *  may obtain a copy of the License at
 *
 *      http://www.opensource.org/licenses/eclipse-1.0.php
 *
 *  See the COPYRIGHT.txt file distributed with this work for information
 *  regarding copyright ownership.
 */
package org.mmtk.plan.gpu;

import org.mmtk.plan.Phase;
import org.mmtk.plan.Plan;
import org.mmtk.plan.StopTheWorld;
import org.mmtk.plan.Trace;
import org.mmtk.plan.TransitiveClosure;
import org.mmtk.policy.MarkSweepSpace;
import org.mmtk.policy.RawPageSpace;
import org.mmtk.policy.Space;
import org.mmtk.utility.Log;
import org.mmtk.utility.deque.AddressDeque;
import org.mmtk.utility.deque.SharedDeque;
import org.mmtk.utility.heap.Mmapper;
import org.mmtk.utility.heap.VMRequest;
import org.mmtk.utility.options.Options;
import org.mmtk.vm.Lock;
import org.mmtk.vm.VM;
import org.vmmagic.pragma.Inline;
import org.vmmagic.pragma.Interruptible;
import org.vmmagic.pragma.Uninterruptible;
import org.vmmagic.unboxed.*;
/**
 * This class implements the global state of a simple mark-sweep collector.
 *
 * All plans make a clear distinction between <i>global</i> and
 * <i>thread-local</i> activities, and divides global and local state
 * into separate class hierarchies.  Global activities must be
 * synchronized, whereas no synchronization is required for
 * thread-local activities.  There is a single instance of Plan (or the
 * appropriate sub-class), and a 1:1 mapping of PlanLocal to "kernel
 * threads" (aka CPUs).  Thus instance
 * methods of PlanLocal allow fast, unsychronized access to functions such as
 * allocation and collection.
 *
 * The global instance defines and manages static resources
 * (such as memory and virtual memory resources).  This mapping of threads to
 * instances is crucial to understanding the correctness and
 * performance properties of MMTk plans.
 */
@Uninterruptible
public class GPU extends StopTheWorld {

  /****************************************************************************
   * Additional phases for the GPU plan.
   */
  public static final short PRE_CLOSURE = Phase.createSimple("gpu-pre-closure");
  public static final short GPU_REF_GRAPH_FILL = Phase.createSimple("gpu-ref-graph-fill");
  public static final short GPU = Phase.createSimple("gpu");
  public static final short GPU_REF_GRAPH_PROCESS = Phase.createSimple("gpu-ref-graph-process");
  public static final short POST_CLOSURE = Phase.createSimple("gpu-post-closure");
  
  // Aligns a pointer to the next starting point for a reference graph entry.
  protected static Address alignRefSpacePtr(Address ptr) {
	  int offset = ptr.toInt() - gpuRefSpace.getStart().toInt();
	  if (offset % (4*4) == (3*4))
		  return ptr;
	  else
		  return ptr.plus((3*4) - (offset % (4*4))); //Address.fromIntZeroExtend((offset & (~0x3)) + 0x4 + 0x3);
  }
  
  protected static final short gpuClosurePhase = Phase.createComplex("gpu-closure", null,
	      Phase.scheduleGlobal     (PRE_CLOSURE),
	      Phase.scheduleCollector  (PRE_CLOSURE),
	      Phase.scheduleGlobal     (GPU_REF_GRAPH_FILL),
	      Phase.scheduleGlobal     (GPU),
	      Phase.scheduleGlobal     (GPU_REF_GRAPH_PROCESS),
	      Phase.scheduleGlobal     (POST_CLOSURE),
	      Phase.scheduleCollector  (POST_CLOSURE));
  
  protected static final short gpuRootClosurePhase = Phase.createComplex("initial-closure", null,
	      Phase.scheduleMutator    (PREPARE),
	      Phase.scheduleGlobal     (PREPARE),
	      Phase.scheduleCollector  (PREPARE),
	      Phase.scheduleComplex    (prepareStacks),
	      Phase.scheduleCollector  (PRECOPY),
	      Phase.scheduleCollector  (STACK_ROOTS),
	      Phase.scheduleCollector  (ROOTS),
	      Phase.scheduleGlobal     (ROOTS),
	      Phase.scheduleComplex    (gpuClosurePhase));
  
  protected static final AddressArray gpuAddresses = AddressArray.create(100000);

  // Reference graph
  public static final Space gpuRefSpace = new RawPageSpace("gpu-refs", 0, VMRequest.create(256, false));
  public static final Lock gpuRefSpaceLock = VM.newLock("gpu-refs");
  public static Address gpuRefSpacePtr = Address.zero();

  @Inline
  public static void allocateRefSpaceNode(ObjectReference ref) {
    int numRefs = VM.gpu.getObjectNumRefs(ref);

    gpuRefSpaceLock.acquire();
    Address node = alignRefSpacePtr(gpuRefSpacePtr);
    gpuRefSpacePtr = node.plus((2 + numRefs) * BYTES_IN_ADDRESS);
    gpuRefSpaceLock.release();

    ref.toAddress().store(node, VM.objectModel.GC_HEADER_OFFSET());
    node.store(ref);
    node.store(numRefs, Offset.fromIntZeroExtend(BYTES_IN_ADDRESS));
  }

  @Uninterruptible
  protected static class FillOutRefs extends TransitiveClosure {
    private Address curEdge;
    @Override
    public void processEdge(ObjectReference source, Address slot) {
      ObjectReference dest = slot.loadObjectReference();
      Address destNode = Address.zero();
      if (!dest.isNull()) {
        destNode = dest.toAddress().loadAddress(VM.objectModel.GC_HEADER_OFFSET());
      }
      curEdge.store(destNode);
      curEdge = curEdge.plus(BYTES_IN_ADDRESS);
    }
    @Inline
    public void run() {
      Address curNode = alignRefSpacePtr(gpuRefSpace.getStart());
      while (curNode.LT(gpuRefSpacePtr)) {
        ObjectReference obj = curNode.loadObjectReference();
        int numRefs = curNode.loadInt(Offset.fromIntZeroExtend(BYTES_IN_ADDRESS));
        curEdge = curNode.plus(2 * BYTES_IN_ADDRESS);
        VM.scanning.scanObject(this, obj);
        curNode = alignRefSpacePtr(curNode.plus((2 + numRefs) * BYTES_IN_ADDRESS));
        if (VM.VERIFY_ASSERTIONS)
          VM.assertions._assert(alignRefSpacePtr(curEdge).EQ(curNode));
      }
    }
  }
  protected static final FillOutRefs gpuRefSpaceFiller = new FillOutRefs();

  @Uninterruptible
  protected static class MarkObjects extends TransitiveClosure {
    @Inline
    public void processNode(ObjectReference object) {
      // Do nothing, tracing was already done
    }
    @Inline
    public ObjectReference traceObject(ObjectReference object) {
      if (Space.isInSpace(MARK_SWEEP, object))
        return msSpace.traceObject(this, object);
      if (Space.isInSpace(Plan.IMMORTAL, object))
        return Plan.immortalSpace.traceObject(this, object);
      if (Space.isInSpace(Plan.LOS, object))
        return Plan.loSpace.traceObject(this, object);
      if (Space.isInSpace(Plan.NON_MOVING, object))
        return Plan.nonMovingSpace.traceObject(this, object);
      if (Plan.USE_CODE_SPACE && Space.isInSpace(Plan.SMALL_CODE, object))
        return Plan.smallCodeSpace.traceObject(this, object);
      if (Plan.USE_CODE_SPACE && Space.isInSpace(Plan.LARGE_CODE, object))
        return Plan.largeCodeSpace.traceObject(this, object);
      if (VM.VERIFY_ASSERTIONS) {
	  //        Log.write("Failing object => "); Log.writeln(object);
        Space.printVMMap();
        VM.assertions._assert(false, "No special case for space in traceObject");
      }
      return ObjectReference.nullReference();
    }
  }
  protected static final MarkObjects gpuMarker = new MarkObjects();

  public GPU() {
  	super();
  	
    // Collector threads collect all pointers into GPU space locally, then push them
  	// to the shared gpuQueue when finished. GPU (global) pulls from gpuQueue during
  	// the global GPU collection phase.
  	gpuQueue.prepareNonBlocking();
  	
	  collection = Phase.createComplex("collection", null,
		      Phase.scheduleComplex(initPhase),
		      Phase.scheduleComplex(gpuRootClosurePhase),
		      //Phase.scheduleComplex(refTypeClosurePhase),
		      //Phase.scheduleComplex(forwardPhase),
		      Phase.scheduleComplex(completeClosurePhase),
		      Phase.scheduleComplex(finishPhase)
		      );
  }
  
  /****************************************************************************
   * Class variables
   */
  public static final MarkSweepSpace msSpace = new MarkSweepSpace("ms", DEFAULT_POLL_FREQUENCY, VMRequest.create());
  public static final int MARK_SWEEP = msSpace.getDescriptor();

  public static final int SCAN_MARK = 0;

  /****************************************************************************
   * Instance variables
   */
  public final Trace msTrace = new Trace(metaDataSpace);
  public final SharedDeque gpuQueue = new SharedDeque("gpuQueue",metaDataSpace, 1);
  public final AddressDeque myQueue = new AddressDeque("global gpu queue", gpuQueue);
  

  /*****************************************************************************
   * Collection
   */

  /**
   * Perform a (global) collection phase.
   *
   * @param phaseId Collection phase to execute.
   */
  @Inline
  @Override
  public void collectionPhase(short phaseId) {
    if (phaseId == PREPARE) {
      super.collectionPhase(phaseId);
      msTrace.prepare();
      msSpace.prepare(true);
      return;
    }

    if (phaseId == PRE_CLOSURE) {
	//Log.writeln("[GPUGC] Pre-closure (global)...");
      msTrace.prepare();
      return;
    }
    
    if (phaseId == GPU_REF_GRAPH_FILL) {
      gpuRefSpaceFiller.run();
      return;
    }


    if (phaseId == GPU) {
      int count = 0;
      while (myQueue.isNonEmpty())
        gpuAddresses.set(count++, myQueue.pop());
      //      Log.write("Performing GPU GC, root set size = ");
      //Log.writeln(count);
      VM.gpu.traceArray(gpuAddresses, count, gpuRefSpace.getStart(), gpuRefSpacePtr);
      return;
    }

    if (phaseId == GPU_REF_GRAPH_PROCESS) {
      // Transfer marks from reference graph to objects, while compacting the reference graph
      Address from = alignRefSpacePtr(gpuRefSpace.getStart());
      Address to = from;
      while (from.LT(gpuRefSpacePtr)) {
    	//Log.write("Write back ");
    	//Log.write(from);
    	//Log.write(" -> ");
    	//Log.write(to);
    	//Log.write(": ");
        int numRefs = from.loadInt(Offset.fromIntZeroExtend(BYTES_IN_ADDRESS));
        //Log.writeln(numRefs);
        if (numRefs < 0) { // Object is reachable
          numRefs &= 0x3FFFFFFF;
          ObjectReference obj = from.loadObjectReference();
          obj.toAddress().store(to, VM.objectModel.GC_HEADER_OFFSET());
          to.store(obj);
          to.store(numRefs, Offset.fromIntZeroExtend(BYTES_IN_ADDRESS));
          to = alignRefSpacePtr(to.plus((2 + numRefs) * BYTES_IN_ADDRESS));
          gpuMarker.traceObject(obj);
        }
        from = alignRefSpacePtr(from.plus((2 + numRefs) * BYTES_IN_ADDRESS));
      }

      //Log.write(" Done, old end = ");
      //Log.write(from);
      //Log.write(", new end = ");
      //Log.writeln(to);
      gpuRefSpacePtr = to;

      return;
    }
    
    if (phaseId == POST_CLOSURE) {
	//      Log.writeln("[GPUGC] Post-closure (global)...");
      msTrace.prepare();
      return;
    }
    
    if (phaseId == CLOSURE) {
	//    	Log.writeln("[GPUGC] Regular closure (global)...");
      msTrace.prepare();
      return;
    }
    
    if (phaseId == RELEASE) {
      msTrace.release();
      msSpace.release();
      super.collectionPhase(phaseId);
      return;
    }

    super.collectionPhase(phaseId);
  }

  /*****************************************************************************
   * Accounting
   */

  /**
   * Return the number of pages reserved for use given the pending
   * allocation.  The superclass accounts for its spaces, we just
   * augment this with the mark-sweep space's contribution.
   *
   * @return The number of pages reserved given the pending
   * allocation, excluding space reserved for copying.
   */
  @Override
  public int getPagesUsed() {
    return (msSpace.reservedPages() + super.getPagesUsed());
  }

  /**
   * Calculate the number of pages a collection is required to free to satisfy
   * outstanding allocation requests.
   *
   * @return the number of pages a collection is required to free to satisfy
   * outstanding allocation requests.
   */
  @Override
  public int getPagesRequired() {
    return super.getPagesRequired() + msSpace.requiredPages();
  }


  /*****************************************************************************
   * Miscellaneous
   */

  @Interruptible
  @Override
  public void boot() {
    super.boot();
    //It would be nice if we could replace this demand mapped region with
    // a normal malloc call since that would speed up the movement of memory
    // to the GC space, but I don't see any easy way to do that within jikes.
    gpuRefSpacePtr = gpuRefSpace.getStart();
    VM.memory.dzmmap(gpuRefSpacePtr, gpuRefSpace.getExtent().toInt());
  }

  @Interruptible
  @Override
  public void postBoot() {
    super.postBoot();
    if (!Options.noReferenceTypes.getValue()) {
	Log.writeln("should use -X:gc:noReferenceTypes=true");
    }
  }

  /**
   * @see org.mmtk.plan.Plan#willNeverMove
   *
   * @param object Object in question
   * @return True if the object will never move
   */
  @Override
  public boolean willNeverMove(ObjectReference object) {
    if (Space.isInSpace(MARK_SWEEP, object))
      return true;
    return super.willNeverMove(object);
  }

  /**
   * Register specialized methods.
   */
  @Interruptible
  @Override
  protected void registerSpecializedMethods() {
    TransitiveClosure.registerSpecializedScan(SCAN_MARK, GPUTraceLocal.class);
    super.registerSpecializedMethods();
  }

  @Interruptible
  public void planHarnessBegin() {
      //run the gc, setup jikes reporting, etc..
      super.planHarnessBegin();

      VM.gpu.harnessBegin();
  }
}