Value.java example

Explorer
h2o-2-master
package water;

import java.io.*;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;

import jsr166y.ForkJoinPool;
import water.Job.ProgressMonitor;
import water.fvec.*;
import water.nbhm.NonBlockingSetInt;
import water.persist.*;
import water.util.Utils;

/**
 * The core Value stored in the distributed K/V store.  It contains an
 * underlying byte[] which may be spilled to disk and freed by the
 * {@link MemoryManager}.
 */
public class
        Value extends Iced implements ForkJoinPool.ManagedBlocker {

  // ---
  // Type-id of serialized object; see TypeMap for the list.
  // Might be a primitive array type, or a Iced POJO
  private short _type;
  public int type() { return _type; }
  public String className() { return TypeMap.className(_type); }

  // Max size of Values before we start asserting.
  // Sizes around this big, or larger are probably true errors.
  // In any case, they will cause issues with both GC (giant pause times on
  // many collectors) and I/O (long term blocking of TCP I/O channels to
  // service a single request, causing starvation of other requests).
  public static final int MAX = 80*1024*1024;

  // ---
  // Values are wads of bits; known small enough to 'chunk' politely on disk,
  // or fit in a Java heap (larger Values are built via arraylets) but (much)
  // larger than a UDP packet.  Values can point to either the disk or ram
  // version or both.  There's no caching smarts, nor compression nor de-dup
  // smarts.  This is just a local placeholder for some user bits being held at
  // this local Node.
  public int _max; // Max length of Value bytes

  // ---
  // A array of this Value when cached in DRAM, or NULL if not cached.  The
  // contents of _mem are immutable (Key/Value mappings can be changed by an
  // explicit PUT action).  Cleared to null asynchronously by the memory
  // manager (but only if persisted to some disk or in a POJO).  Can be filled
  // in by reloading from disk, or by serializing a POJO.
  private volatile byte[] _mem;
  public final byte[] rawMem() { return _mem; }

  // ---
  // A POJO version of the _mem array, or null if the _mem has not been
  // serialized or if _mem is primitive data and not a POJO.  Cleared to null
  // asynchronously by the memory manager (but only if persisted to some disk,
  // or in the _mem array).  Can be filled in by deserializing the _mem array.

  // NOTE THAT IF YOU MODIFY any fields of a POJO that is part of a Value,
  // - this is NOT the recommended programming style,
  // - those changes are visible to all CPUs on the writing node,
  // - but not to other nodes, and
  // - the POJO might be dropped by the MemoryManager and reconstituted from
  //   disk and/or the byte array back to it's original form, losing your changes.
  private volatile Freezable _pojo;
  public Freezable rawPOJO() { return _pojo; }

  // Free array (but always be able to rebuild the array)
  public final void freeMem() {
    assert isPersisted() || _pojo != null || _key._kb[0]==Key.DVEC;
    _mem = null;
  }
  // Free POJO (but always be able to rebuild the POJO)
  public final void freePOJO() {
    assert isPersisted() || _mem != null;
    _pojo = null;
  }

  // The FAST path get-byte-array - final method for speed.
  // Will (re)build the mem array from either the POJO or disk.
  // Never returns NULL.
  public final byte[] memOrLoad() {
    byte[] mem = _mem;          // Read once!
    if( mem != null ) return mem;
    Freezable pojo = _pojo;     // Read once!
    if( pojo != null )
      if( pojo instanceof Chunk ) return (_mem = ((Chunk)pojo).getBytes());
      else return (_mem = pojo.write(new AutoBuffer()).buf());
    if( _max == 0 ) return (_mem = new byte[0]);
    return (_mem = loadPersist());
  }
  // Just an empty shell of a Value, no local data but the Value is "real".
  // Any attempt to look at the Value will require a remote fetch.
  public final boolean isEmpty() { return _max > 0 && _mem==null && _pojo == null && !isPersisted(); }
  public final byte[] getBytes() {
    assert _type==TypeMap.PRIM_B && _pojo == null;
    byte[] mem = _mem;          // Read once!
    return mem != null ? mem : (_mem = loadPersist());
  }

  // The FAST path get-POJO - final method for speed.
  // Will (re)build the POJO from the _mem array.
  // Never returns NULL.
  public <T extends Iced> T get() {
    touch();
    Iced pojo = (Iced)_pojo;    // Read once!
    if( pojo != null ) return (T)pojo;
    pojo = TypeMap.newInstance(_type);
    pojo.read(new AutoBuffer(memOrLoad()));
    pojo.init(_key);
    return (T)(_pojo = pojo);
  }
  public <T extends Freezable> T get(Class<T> fc) {
    T pojo = getFreezable();
    assert fc.isAssignableFrom(pojo.getClass());
    return pojo;
  }
  public <T extends Freezable> T getFreezable() {
    touch();
    Freezable pojo = _pojo;     // Read once!
    if( pojo != null ) return (T)pojo;
    pojo = TypeMap.newFreezable(_type);
    pojo.read(new AutoBuffer(memOrLoad()));
    if( pojo instanceof Iced ) ((Iced)pojo).init(_key);
    return (T)(_pojo = pojo);
  }

  // ---
  // Time of last access to this value.
  transient long _lastAccessedTime = System.currentTimeMillis();
  public final void touch() {_lastAccessedTime = System.currentTimeMillis();}


  // ---
  // A Value is persisted. The Key is used to define the filename.
  public transient Key _key;

  // ---
  // Backend persistence info.  3 bits are reserved for 8 different flavors of
  // backend storage.  1 bit for whether or not the latest _mem field is
  // entirely persisted on the backend storage, or not.  Note that with only 1
  // bit here there is an unclosable datarace: one thread could be trying to
  // change _mem (e.g. to null for deletion) while another is trying to write
  // the existing _mem to disk (for persistence).  This datarace only happens
  // if we have racing deletes of an existing key, along with racing persist
  // attempts.  There are other races that are stopped higher up the stack: we
  // do not attempt to write to disk, unless we have *all* of a Value, so
  // extending _mem (from a remote read) should not conflict with writing _mem
  // to disk.
  //
  // The low 3 bits are final.
  // The on/off disk bit is strictly cleared by the higher layers (e.g. Value.java)
  // and strictly set by the persistence layers (e.g. PersistIce.java).
  public volatile byte _persist; // 3 bits of backend flavor; 1 bit of disk/notdisk
  public final static byte ICE = 1<<0; // ICE: distributed local disks
  public final static byte HDFS= 2<<0; // HDFS: backed by hadoop cluster
  public final static byte S3  = 3<<0; // Amazon S3
  public final static byte NFS = 4<<0; // NFS: Standard file system
  public final static byte TACHYON = 5<<0; // Support for tachyon FS
  public final static byte TCP = 7<<0; // TCP: For profile purposes, not a storage system
  public final static byte BACKEND_MASK = (8-1);
  public final static byte NOTdsk = 0<<3; // latest _mem is persisted or not
  public final static byte ON_dsk = 1<<3;
  final public void clrdsk() { _persist &= ~ON_dsk; } // note: not atomic
  final public void setdsk() { _persist |=  ON_dsk; } // note: not atomic
  final public boolean isPersisted() { return (_persist&ON_dsk)!=0; }
  final public byte backend() { return (byte)(_persist&BACKEND_MASK); }

  // ---
  // Interface for using the persistence layer(s).
  public boolean onICE    () { return (backend()) ==     ICE; }
  public boolean onHDFS   () { return (backend()) ==    HDFS; }
  public boolean onNFS    () { return (backend()) ==     NFS; }
  public boolean onS3     () { return (backend()) ==      S3; }
  public boolean onTachyon() { return (backend()) == TACHYON; }

  /** Store complete Values to disk */
  void storePersist() throws IOException {
    if( isPersisted() ) return;
    Persist.I[backend()].store(this);
  }

  /** Remove dead Values from disk */
  void removeIce() {
    // do not yank memory, as we could have a racing get hold on to this
    //  free_mem();
    if( !isPersisted() || !onICE() ) return; // Never hit disk?
    clrdsk();  // Not persisted now
    Persist.I[backend()].delete(this);
  }
  /** Load some or all of completely persisted Values */
  byte[] loadPersist() {
    assert isPersisted();
    return Persist.I[backend()].load(this);
  }

  public String nameOfPersist() { return nameOfPersist(backend()); }
  public static String nameOfPersist(int x) {
    switch( x ) {
    case ICE : return "ICE";
    case HDFS: return "HDFS";
    case S3  : return "S3";
    case NFS : return "NFS";
    case TCP : return "TCP";
    default  : return "UNKNOWN(" + x + ")";
    }
  }

  /** Set persistence to HDFS from ICE */
  public void setHdfs() {
    assert onICE();
    byte[] mem = memOrLoad();    // Get into stable memory
    _persist = Value.HDFS|Value.NOTdsk;
    Persist.I[Value.HDFS].store(this);
    removeIce();           // Remove from ICE disk
    assert onHDFS();       // Flip to HDFS
    _mem = mem; // Close a race with the H2O cleaner zapping _mem while removing from ice
  }


  public StringBuilder getString( int len, StringBuilder sb ) {
    int newlines=0;
    byte[] b = memOrLoad();
    final int LEN=Math.min(len,b.length);
    for( int i=0; i<LEN; i++ ) {
      byte c = b[i];
      if( c == '&' ) sb.append("&");
      else if( c == '<' ) sb.append("<");
      else if( c == '>' ) sb.append(">");
      else if( c == '\n' ) { sb.append("<br>"); if( newlines++ > 5 ) break; }
      else if( c == ',' && i+1<LEN && b[i+1]!=' ' )
        sb.append(", ");
      else sb.append((char)c);
    }
    if( b.length > LEN ) sb.append("...");
    return sb;
  }

  public boolean isLockable(){ return _type != TypeMap.PRIM_B && (TypeMap.newInstance(_type) instanceof Lockable); }
  public boolean isFrame()   { return _type == TypeMap.FRAME; }
  public boolean isVec()     { return _type != TypeMap.PRIM_B && (TypeMap.newInstance(_type) instanceof Vec); }
  public boolean isByteVec() { return _type != TypeMap.PRIM_B && (TypeMap.newInstance(_type) instanceof ByteVec); }
  public boolean isRawData() {
    if(isFrame()){
      Frame fr = get();
      return fr.vecs().length == 1 && (fr.vecs()[0] instanceof ByteVec);
    }
    // either simple value with bytearray, un-parsed value array or byte vec
    return _type == TypeMap.PRIM_B || isByteVec();
  }

  public byte[] getFirstBytes() {
    Value v = this;
    if(isByteVec()){
      ByteVec vec = get();
      return vec.chunkForChunkIdx(0).getBytes();
    } else if(isFrame()){
      Frame fr = get();
      return ((ByteVec)fr.vecs()[0]).chunkForChunkIdx(0).getBytes();
    }
    // Return empty array if key has been deleted
    return v != null ? v.memOrLoad() : new byte[0];
  }

  // For plain Values, just the length in bytes.
  // For ValueArrays, the length of all chunks.
  // For Frames, the compressed size of all vecs within the frame.
  public long length() {
    if (isFrame()) {
      return ((Frame)get()).byteSize();
    }

    return _max;
  }

  public InputStream openStream() throws IOException {
    return openStream(null);
  }
  /** Creates a Stream for reading bytes */
  public InputStream openStream(ProgressMonitor p) throws IOException {
    if(onNFS() ) return PersistNFS .openStream(_key  );
    if(onHDFS()) return PersistHdfs.openStream(_key,p);
    if(onS3()  ) return PersistS3  .openStream(_key,p);
    if(onTachyon()) return PersistTachyon.openStream(_key,p);
    if( isFrame() ) throw new IllegalArgumentException("Tried to pass a Frame to openStream (maybe tried to parse a (already-parsed) Frame?)");
    assert _type==TypeMap.PRIM_B : "Expected byte[] type but got "+TypeMap.className(_type);
    return new ByteArrayInputStream(memOrLoad());
  }

  public boolean isBitIdentical( Value v ) {
    if( this == v ) return true;
    if( !isFrame() && !v.isFrame() )
      return Arrays.equals(getBytes(), v.getBytes());
    Frame fr0 =   get();
    Frame fr1 = v.get();
    if( fr0.numRows() != fr1.numRows() ) return false;
    if( fr0.numCols() != fr1.numCols() ) return false;
    return new BitCmp(fr1).doAll(fr0)._eq;
  }
  private static class BitCmp extends MRTask2<BitCmp> {
    final Frame _fr;
    BitCmp( Frame fr ) { _fr = fr; }
    boolean _eq;
    @Override public void map( Chunk[] chks ) {
      int cols = chks.length;
      int rows = chks[0]._len;
      long start = chks[0]._start;
      for( int c=0; c<cols; c++ ) {
        Chunk c0 = chks[c    ];
        Vec v1 = _fr.vecs()[c];
        if( c0._vec.isUUID() ) {
          for( int r=0; r<rows; r++ )
            if( !( c0.isNA0(r) && v1. isNA(r+start)) &&
                (( c0. isNA0(r)&&!v1. isNA(r+start)) || 
                 (!c0. isNA0(r)&& v1. isNA(r+start)) ||
                 ( c0.at16l0(r)!= v1.at16l(r+start))|| 
                 ( c0.at16h0(r)!= v1.at16h(r+start))) )
              return;
        } else {
          for( int r=0; r<rows; r++ )
            if( !Utils.compareDoubles(c0.at0(r),v1.at(r+start)) )
              return;
        }
      }
      _eq = true;
    }
    @Override public void reduce( BitCmp bc ) { _eq &= bc._eq; }
  }

  // --------------------------------------------------------------------------
  // Set just the initial fields
  public Value(Key k, int max, byte[] mem, short type, byte be ) {
    assert mem==null || mem.length==max;
    assert max < MAX : "Value size=0x"+Integer.toHexString(max);
    _key = k;
    _max = max;
    _mem = mem;
    _type = type;
    _pojo = null;
    // For the ICE backend, assume new values are not-yet-written.
    // For HDFS & NFS backends, assume we from global data and preserve the
    // passed-in persist bits
    byte p = (byte)(be&BACKEND_MASK);
    _persist = (p==ICE) ? p : be;
    _rwlock = new AtomicInteger(0);
    _replicas = k.home() ? new NonBlockingSetInt() : null;
  }
  public Value(Key k, byte[] mem ) { this(k, mem.length, mem, TypeMap.PRIM_B, ICE); }
  public Value(Key k, int max ) { this(k, max, new byte[max], TypeMap.PRIM_B, ICE); }
  public Value(Key k, int max, byte be ) { this(k, max, null, TypeMap.PRIM_B,  be); }
  public Value(Key k, String s ) { this(k, s.getBytes()); }
  public Value(Key k, Iced pojo ) { this(k,pojo,ICE); }
  public Value(Key k, Iced pojo, byte be ) {
    _key = k;
    _pojo = pojo;
    _type = (short)pojo.frozenType();
    _mem = (pojo instanceof Chunk)?((Chunk)pojo).getBytes():pojo.write(new AutoBuffer()).buf();
    _max = _mem.length;
    // For the ICE backend, assume new values are not-yet-written.
    // For HDFS & NFS backends, assume we from global data and preserve the
    // passed-in persist bits
    byte p = (byte)(be&BACKEND_MASK);
    _persist = (p==ICE) ? p : be;
    _rwlock = new AtomicInteger(0);
    _replicas = k.home() ? new NonBlockingSetInt() : null;
  }
  public Value(Key k, Freezable pojo) {
    _key = k;
    _pojo = pojo;
    _type = (short)pojo.frozenType();
    _mem = pojo.write(new AutoBuffer()).buf();
    _max = _mem.length;
    _persist = ICE;
    _rwlock = new AtomicInteger(0);
    _replicas = k.home() ? new NonBlockingSetInt() : null;
  }
  // Nullary constructor for weaving
  public Value() {
    _rwlock = new AtomicInteger(0);
    _replicas = new NonBlockingSetInt();
  }

  // Custom serializers: the _mem field is racily cleared by the MemoryManager
  // and the normal serializer then might ship over a null instead of the
  // intended byte[].  Also, the value is NOT on the deserialize'd machines disk
  public AutoBuffer write(AutoBuffer bb) {
    byte p = _persist;
    if( onICE() ) p &= ~ON_dsk; // Not on the remote disk
    return bb.put1(p).put2(_type).putA1(memOrLoad());
  }

  // Custom serializer: set _max from _mem length; set replicas & timestamp.
  public Value read(AutoBuffer bb) {
    assert _key == null;        // Not set yet
    _persist = (byte) bb.get1();
    _type = (short) bb.get2();
    _mem = bb.getA1();
    _max = _mem.length;
    _pojo = null;
    // On remote nodes _rwlock is initialized to 0 (signaling a remote PUT is
    // in progress) flips to -1 when the remote PUT is done, or +1 if a notify
    // needs to happen.
    _rwlock.set(-1);            // Set as 'remote put is done'
    touch();
    return this;
  }

  // ---------------------
  // Ordering of K/V's!  This field tracks a bunch of things used in ordering
  // updates to the same Key.  Ordering Rules:
  // - Program Order.  You see your own writes.  All writes in a single thread
  //   strongly ordered (writes never roll back).  In particular can:
  //   PUT(v1), GET, PUT(null) and The Right Thing happens.
  // - Unrelated writes can race (unless fencing).
  // - Writes are not atomic: some people can see a write ahead of others.
  // - Last-write-wins: if we do a zillion writes to the same Key then wait "a
  //   long time", then do reads all reads will see the same last value.
  // - Blocking on a PUT stalls until the PUT is cloud-wide visible
  //
  // For comparison to H2O get/put MM
  // IA Memory Ordering,  8 principles from Rich Hudson, Intel
  // 1. Loads are not reordered with other loads
  // 2. Stores are not reordered with other stores
  // 3. Stores are not reordered with older loads
  // 4. Loads may be reordered with older stores to different locations but not
  //    with older stores to the same location
  // 5. In a multiprocessor system, memory ordering obeys causality (memory
  //    ordering respects transitive visibility).
  // 6. In a multiprocessor system, stores to the same location have a total order
  // 7. In a multiprocessor system, locked instructions have a total order
  // 8. Loads and stores are not reordered with locked instructions.
  //
  // My (KN, CNC) interpretation of H2O MM from today:
  // 1. Gets are not reordered with other Gets
  // 2  Puts may be reordered with Puts to different Keys.
  // 3. Puts may be reordered with older Gets to different Keys, but not with
  //    older Gets to the same Key.
  // 4. Gets may be reordered with older Puts to different Keys but not with
  //    older Puts to the same Key.
  // 5. Get/Put amongst threads doesn't obey causality
  // 6. Puts to the same Key have a total order.
  // 7. no such thing. although RMW operation exists with Put-like constraints.
  // 8. Gets and Puts may be reordered with RMW operations
  // 9. A write barrier exists that creates Sequential Consistency.  Same-key
  //    ordering (3-4) can't be used to create the effect.
  //
  // A Reader/Writer lock for the home node to control racing Gets and Puts.
  // - 0 for unlocked
  // - +N for locked by N concurrent GETs-in-flight
  // - -1 for write-locked
  //
  // An ACKACK from the client GET lowers the reader lock count.
  //
  // Home node PUTs alter which Value is mapped to a Key, then they block until
  // there are no active GETs, then atomically set the write-lock, then send
  // out invalidates to all the replicas.  PUTs return when all invalidates
  // have reported back.
  //
  // An initial remote PUT will default the value to 0.  A 2nd PUT attempt will
  // block until the 1st one completes (multiple writes to the same Key from
  // the same JVM block, so there is at most 1 outstanding write to the same
  // Key from the same JVM).  The 2nd PUT will CAS the value to 1, indicating
  // the need for the finishing 1st PUT to call notify().
  //
  // Note that this sequence involves a lot of blocking on repeated writes with
  // cached readers, but not the readers - i.e., writes are slow to complete.
  private transient final AtomicInteger _rwlock;
  private boolean RW_CAS( int old, int nnn, String msg ) {
    if( !_rwlock.compareAndSet(old,nnn) ) return false;
    //System.out.println(_key+", "+old+" -> "+nnn+", "+msg);
    return true;
  }
  // List of who is replicated where
  private transient final NonBlockingSetInt _replicas;
  public int numReplicas() { return _replicas.size(); }
  /** True if h2o has a copy of this Value */
  boolean isReplicatedTo( H2ONode h2o ) { return _replicas.contains(h2o._unique_idx); }

  /** Atomically insert h2o into the replica list; reports false if the Value
   *  flagged against future replication with a -1.  Also bumps the active
   *  Get count, which remains until the Get completes (we receive an ACKACK). */
  boolean setReplica( H2ONode h2o ) {
    assert _key.home(); // Only the HOME node for a key tracks replicas
    assert h2o != H2O.SELF;     // Do not track self as a replica
    while( true ) {     // Repeat, in case racing GETs are bumping the counter
      int old = _rwlock.get();
      if( old == -1 ) return false; // Write-locked; no new replications.  Read fails to read *this* value
      assert old >= 0;              // Not negative
      if( RW_CAS(old,old+1,"rlock+") ) break;
    }
    // Narrow non-race here.  Here is a time window where the rwlock count went
    // up, but the replica list does not account for the new replica.  However,
    // the rwlock cannot go down until an ACKACK is received, and the ACK
    // (hence ACKACK) doesn't go out until after this function returns.
    _replicas.add(h2o._unique_idx);
    // Both rwlock taken, and replica count is up now.
    return true;
  }

  /** Atomically lower active GET count */
  void lowerActiveGetCount( H2ONode h2o ) {
    assert _key.home();    // Only the HOME node for a key tracks replicas
    assert h2o != H2O.SELF;// Do not track self as a replica
    while( true ) {        // Repeat, in case racing GETs are bumping the counter
      int old = _rwlock.get(); // Read the lock-word
      assert old > 0;      // Since lowering, must be at least 1
      assert old != -1;    // Not write-locked, because we are an active reader
      assert _replicas.contains(h2o._unique_idx); // Self-bit is set
      if( RW_CAS(old,old-1,"rlock-") ) {
        if( old-1 == 0 )   // GET count fell to zero?
          synchronized( this ) { notifyAll(); } // Notify any pending blocked PUTs
        return;            // Repeat until count is lowered
      }
    }
  }

  /** This value was atomically extracted from the local STORE by a successful
   *  TaskPutKey attempt (only 1 thread can ever extract and thus call here).
   *  No future lookups will find this Value, but there may be existing uses.
   *  Atomically set the rwlock count to -1 locking it from further GETs and
   *  ship out invalidates to caching replicas.  May need to block on active
   *  GETs.  Updates a set of Future invalidates that can be blocked against. */
  Futures lockAndInvalidate( H2ONode sender, Futures fs ) {
    assert _key.home(); // Only the HOME node for a key tracks replicas
    // Write-Lock against further GETs
    while( true ) {      // Repeat, in case racing GETs are bumping the counter
      int old = _rwlock.get();
      assert old >= 0 : _key+", rwlock="+old;  // Count does not go negative
      assert old != -1; // Only the thread doing a PUT ever locks
      if( old !=0 ) { // has readers?
        // Active readers: need to block until the GETs (of this very Value!)
        // all complete, before we can invalidate this Value - lest a racing
        // Invalidate bypass a GET.
        try { ForkJoinPool.managedBlock(this); } catch( InterruptedException e ) { }
      } else if( RW_CAS(0,-1,"wlock") )
        break;                  // Got the write-lock!
    }
    // We have the set of Nodes with replicas now.  Ship out invalidates.
    int max = _replicas.length();
    for( int i=0; i<max; i++ )
      if( _replicas.contains(i) && H2ONode.IDX[i] != sender )
        TaskInvalidateKey.invalidate(H2ONode.IDX[i],_key,fs);
    return fs;
  }

  /** Initialize the _replicas field for a PUT.  On the Home node (for remote
   *  PUTs), it is initialized to the one replica we know about, and not
   *  read-locked.  Used on a new Value about to be PUT on the Home node. */
  void initReplicaHome( H2ONode h2o, Key key ) {
    assert key.home();
    assert _key == null; // This is THE initializing key write for serialized Values
    assert h2o != H2O.SELF;     // Do not track self as a replica
    _key = key;
    // Set the replica bit for the one node we know about, and leave the
    // rest clear.
    _replicas.add(h2o._unique_idx);
    _rwlock.set(0);             // No GETs are in-flight at this time.
    //System.out.println(key+", init "+_rwlock.get());
  }

  /** Block this thread until all prior remote PUTs complete - to force
   *  remote-PUT ordering on the home node. */
  void startRemotePut() {
    assert !_key.home();
    int x = 0;
    // assert I am waiting on threads with higher priority?
    while( (x=_rwlock.get()) != -1 ) // Spin until rwlock==-1
      if( x == 1 || RW_CAS(0,1,"remote_need_notify") )
        try { ForkJoinPool.managedBlock(this); } catch( InterruptedException e ) { }
  }

  /** The PUT for this Value has completed.  Wakeup any blocked later PUTs. */
  void completeRemotePut() {
    assert !_key.home();
    // Attempt an eager blind attempt, assuming no blocked pending notifies
    if( RW_CAS(0, -1,"remote_complete") ) return;
    synchronized(this) {
      boolean res = RW_CAS(1, -1,"remote_do_notify");
      assert res;               // Must succeed
      notifyAll();              // Wake up pending blocked PUTs
    }
  }

  /** Return true if blocking is unnecessary.
   *  Alas, used in TWO places and the blocking API forces them to share here. */
  @Override public boolean isReleasable() {
    int r = _rwlock.get();
    if( _key.home() ) {         // Called from lock_and_invalidate
      // Home-key blocking: wait for active-GET count to fall to zero
      return r == 0;
    } else {                    // Called from start_put
      // Remote-key blocking: wait for active-PUT lock to hit -1
      assert r == 1 || r == -1; // Either waiting (1) or done (-1) but not started(0)
      return r == -1;           // done!
    }
  }
  /** Possibly blocks the current thread.  Returns true if isReleasable would
   * return true.  Used by the FJ Pool management to spawn threads to prevent
   * deadlock is otherwise all threads would block on waits. */
  @Override public synchronized boolean block() {
    while( !isReleasable() ) { try { wait(); } catch( InterruptedException e ) { } }
    return true;
  }
}