package org.nd4j.aeron.ipc;
import io.aeron.Aeron;
import io.aeron.Publication;
import io.aeron.exceptions.DriverTimeoutException;
import lombok.Builder;
import lombok.Data;
import org.agrona.CloseHelper;
import org.agrona.DirectBuffer;
import org.agrona.concurrent.BusySpinIdleStrategy;
import org.agrona.concurrent.UnsafeBuffer;
import org.nd4j.aeron.ipc.chunk.NDArrayMessageChunk;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.nio.ByteBuffer;
/**
* NDArray publisher
* for aeron
*
* @author Adam Gibson
*/
@Data
@Builder
public class AeronNDArrayPublisher implements AutoCloseable {
// A unique identifier for a stream within a channel. Stream ID 0 is reserved
// for internal use and should not be used by applications.
private int streamId;
// The channel (an endpoint identifier) to send the message to
private String channel;
private boolean init = false;
private Aeron.Context ctx;
private Aeron aeron;
private Publication publication;
private static Logger log = LoggerFactory.getLogger(AeronNDArrayPublisher.class);
public final static int NUM_RETRIES = 100;
private boolean compress = true;
private static final BusySpinIdleStrategy busySpinIdleStrategy = new BusySpinIdleStrategy();
private int publishRetryTimeOut = 3000;
private void init() {
channel = channel == null ? "aeron:udp?endpoint=localhost:40123" : channel;
streamId = streamId == 0 ? 10 : streamId;
publishRetryTimeOut = publishRetryTimeOut == 0 ? 3000 : publishRetryTimeOut;
ctx = ctx == null ? ctx = new Aeron.Context() : ctx;
init = true;
log.info("Channel publisher" + channel + " and stream " + streamId);
}
/**
* Publish an ndarray
* to an aeron channel
* @param message
* @throws Exception
*/
public void publish(NDArrayMessage message) throws Exception {
if (!init)
init();
// Create a context, needed for client connection to media driver
// A separate media driver process needs to be running prior to starting this application
// Create an Aeron instance with client-provided context configuration and connect to the
// media driver, and create a Publication. The Aeron and Publication classes implement
// AutoCloseable, and will automatically clean up resources when this try block is finished.
boolean connected = false;
if (aeron == null) {
try {
while (!connected) {
aeron = Aeron.connect(ctx);
connected = true;
}
} catch (Exception e) {
log.warn("Reconnecting on publisher...failed to connect");
}
}
int connectionTries = 0;
while (publication == null && connectionTries < NUM_RETRIES) {
try {
publication = aeron.addPublication(channel, streamId);
log.info("Created publication on channel " + channel + " and stream " + streamId);
} catch (DriverTimeoutException e) {
Thread.sleep(1000 * (connectionTries + 1));
log.warn("Failed to connect due to driver time out on channel " + channel + " and stream " + streamId
+ "...retrying in " + connectionTries + " seconds");
connectionTries++;
}
}
if (!connected && connectionTries >= 3 || publication == null) {
throw new IllegalStateException(
"Publisher unable to connect to channel " + channel + " and stream " + streamId);
}
// Allocate enough buffer size to hold maximum message length
// The UnsafeBuffer class is part of the Agrona library and is used for efficient buffer management
log.info("Publishing to " + channel + " on stream Id " + streamId);
//ensure default values are set
INDArray arr = message.getArr();
if (isCompress())
while (!message.getArr().isCompressed())
Nd4j.getCompressor().compressi(arr, "GZIP");
//array is large, need to segment
if (NDArrayMessage.byteBufferSizeForMessage(message) >= publication.maxMessageLength()) {
NDArrayMessageChunk[] chunks = NDArrayMessage.chunks(message, publication.maxMessageLength() / 128);
for (int i = 0; i < chunks.length; i++) {
ByteBuffer sendBuff = NDArrayMessageChunk.toBuffer(chunks[i]);
sendBuff.rewind();
DirectBuffer buffer = new UnsafeBuffer(sendBuff);
sendBuffer(buffer);
}
} else {
//send whole array
DirectBuffer buffer = NDArrayMessage.toBuffer(message);
sendBuffer(buffer);
}
}
private void sendBuffer(DirectBuffer buffer) throws Exception {
// Try to publish the buffer. 'offer' is a non-blocking call.
// If it returns less than 0, the message was not sent, and the offer should be retried.
long result;
int tries = 0;
while ((result = publication.offer(buffer, 0, buffer.capacity())) < 0L && tries < 5) {
if (result == Publication.BACK_PRESSURED) {
log.info("Offer failed due to back pressure");
} else if (result == Publication.NOT_CONNECTED) {
log.info("Offer failed because publisher is not connected to subscriber " + channel + " and stream "
+ streamId);
} else if (result == Publication.ADMIN_ACTION) {
log.info("Offer failed because of an administration action in the system and channel" + channel
+ " and stream " + streamId);
} else if (result == Publication.CLOSED) {
log.info("Offer failed publication is closed and channel" + channel + " and stream " + streamId);
} else {
log.info(" Offer failed due to unknown reason and channel" + channel + " and stream " + streamId);
}
Thread.sleep(publishRetryTimeOut);
tries++;
}
if (tries >= 5 && result == 0)
throw new IllegalStateException("Failed to send message");
}
/**
* Publish an ndarray to an aeron channel
* @param arr
* @throws Exception
*/
public void publish(INDArray arr) throws Exception {
publish(NDArrayMessage.wholeArrayUpdate(arr));
}
/**
* Closes this resource, relinquishing any underlying resources.
* This method is invoked automatically on objects managed by the
* {@code try}-with-resources statement.
* <p>
* <p>While this interface method is declared to throw {@code
* Exception}, implementers are <em>strongly</em> encouraged to
* declare concrete implementations of the {@code close} method to
* throw more specific exceptions, or to throw no exception at all
* if the close operation cannot fail.
* <p>
* <p> Cases where the close operation may fail require careful
* attention by implementers. It is strongly advised to relinquish
* the underlying resources and to internally <em>mark</em> the
* resource as closed, prior to throwing the exception. The {@code
* close} method is unlikely to be invoked more than once and so
* this ensures that the resources are released in a timely manner.
* Furthermore it reduces problems that could arise when the resource
* wraps, or is wrapped, by another resource.
* <p>
* <p><em>Implementers of this interface are also strongly advised
* to not have the {@code close} method throw {@link
* InterruptedException}.</em>
* <p>
* This exception interacts with a thread's interrupted status,
* and runtime misbehavior is likely to occur if an {@code
* InterruptedException} is {@linkplain Throwable#addSuppressed
* suppressed}.
* <p>
* More generally, if it would cause problems for an
* exception to be suppressed, the {@code AutoCloseable.close}
* method should not throw it.
* <p>
* <p>Note that unlike the {@link Closeable#close close}
* method of {@link Closeable}, this {@code close} method
* is <em>not</em> required to be idempotent. In other words,
* calling this {@code close} method more than once may have some
* visible side effect, unlike {@code Closeable.close} which is
* required to have no effect if called more than once.
* <p>
* However, implementers of this interface are strongly encouraged
* to make their {@code close} methods idempotent.
*
* @throws Exception if this resource cannot be closed
*/
@Override
public void close() throws Exception {
if (publication != null) {
CloseHelper.quietClose(publication);
}
}
}