/*
* Copyright (c) 2003-2007 Sun Microsystems, Inc. All rights reserved.
*
* The Sun Project JXTA(TM) Software License
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. The end-user documentation included with the redistribution, if any, must
* include the following acknowledgment: "This product includes software
* developed by Sun Microsystems, Inc. for JXTA(TM) technology."
* Alternately, this acknowledgment may appear in the software itself, if
* and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Sun", "Sun Microsystems, Inc.", "JXTA" and "Project JXTA" must
* not be used to endorse or promote products derived from this software
* without prior written permission. For written permission, please contact
* Project JXTA at http://www.jxta.org.
*
* 5. Products derived from this software may not be called "JXTA", nor may
* "JXTA" appear in their name, without prior written permission of Sun.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SUN
* MICROSYSTEMS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* JXTA is a registered trademark of Sun Microsystems, Inc. in the United
* States and other countries.
*
* Please see the license information page at :
* <http://www.jxta.org/project/www/license.html> for instructions on use of
* the license in source files.
*
* ====================================================================
*
* This software consists of voluntary contributions made by many individuals
* on behalf of Project JXTA. For more information on Project JXTA, please see
* http://www.jxta.org.
*
* This license is based on the BSD license adopted by the Apache Foundation.
*/
package net.jxta.impl.util.pipe.reliable;
import net.jxta.endpoint.ByteArrayMessageElement;
import net.jxta.endpoint.Message;
import net.jxta.endpoint.MessageElement;
import net.jxta.endpoint.StringMessageElement;
import net.jxta.endpoint.WireFormatMessage;
import net.jxta.endpoint.WireFormatMessageFactory;
import net.jxta.impl.util.TimeUtils;
import net.jxta.logging.Logging;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Accepts data and packages it into messages for sending to the remote. The
* messages are kept in a retry queue until the remote peer acknowledges
* receipt of the message.
*/
public class ReliableOutputStream extends OutputStream implements Incoming {
/**
* Logger
*/
private final static Logger LOG = Logger.getLogger(ReliableOutputStream.class.getName());
/**
* Initial estimated Round Trip Time
*
* Ten seconds is much too long here. Reduced to five.
*
*/
private final static long initRTT = 5 * TimeUtils.ASECOND;
/**
* The default size for the blocks we will chunk the stream into.
*/
private final static int DEFAULT_MESSAGE_CHUNK_SIZE = 63 * 1024;
private final static MessageElement RETELT = new StringMessageElement(Defs.RETRY_ELEMENT_NAME, Defs.RETRY_ELEMENT_VALUE, null);
/**
* A lock we use to ensure that write operations happen in order.
*/
private final Object writeLock = new String("writeLock");
/**
* The buffer we cache writes to.
*/
private byte[] writeBuffer = null;
/**
* Number of bytes written to the write buffer.
*/
private int writeCount = 0;
/**
* Set the default write buffer size.
*/
private int writeBufferSize = DEFAULT_MESSAGE_CHUNK_SIZE;
/**
* If less than {@code TimeUtils.timenow()} then we are closed otherwise
* this is the absolute time at which we will become closed. We begin by
* setting this value as {@Long.MAX_VALUE} until we establish an earlier
* close deadline.
*/
private long closedAt = Long.MAX_VALUE;
/**
* If {@code true} then we have received a close request from the remote
* side. They do not want to receive any more messages from us.
*/
private volatile boolean remoteClosed = false;
/**
* If {@code true} then we have closed this stream locally and will not
* accept any further messages for sending. Unacknowledged messages will
* be retransmitted until the linger delay is passed.
*/
private volatile boolean localClosed = false;
/**
* The relative time in milliseconds that we will allow our connection to
* linger.
*/
private long lingerDelay = 120 * TimeUtils.ASECOND;
/**
* Sequence number of the message we most recently sent out.
*/
private final AtomicInteger sequenceNumber = new AtomicInteger(0);
/**
* Sequence number of highest sequential ACK.
*/
private volatile int maxACK = 0;
/**
* connection we are working for
*/
private final Outgoing outgoing;
/**
* The daemon thread that performs retransmissions.
*/
private Thread retrThread = null;
// for retransmission
/**
* Average round trip time in milliseconds.
*/
private volatile long aveRTT = initRTT;
private volatile long remRTT = 0;
/**
* Has aveRTT been set at least once over its initial guesstimate value.
*/
private boolean aveRTTreset = false;
/**
* Number of ACK message received.
*/
private final AtomicInteger numACKS = new AtomicInteger(0);
/**
* When to start computing aveRTT
*/
private int rttThreshold = 0;
/**
* Retry Time Out measured in milliseconds.
*/
private volatile long RTO = 0;
/**
* Minimum Retry Timeout measured in milliseconds.
*/
private volatile long minRTO = 500; // We begin with a reasonable value for an average network. This will not be used if RTT is greater.
/**
* absolute time in milliseconds of last sequential ACK.
*/
private volatile long lastACKTime = 0;
/**
* absolute time in milliseconds of last SACK based retransmit.
*/
private volatile long sackRetransTime = 0;
// running average of receipients Input Queue
private int nIQTests = 0;
private int aveIQSize = 0;
/**
* Our estimation of the current free space in the remote input queue.
*/
private volatile int mrrIQFreeSpace = 0;
/**
* Our estimation of the maximum size of the remote input queue.
*/
private int rmaxQSize = Defs.MAXQUEUESIZE;
/**
* The flow control module.
*/
private final FlowControl fc;
/**
* Cache of the last rwindow recommendation by fc.
*/
private volatile int rwindow = 0;
/**
* Number of acknowledged sends (round trips) before the connection is regarded as 'stable'
* Once stabilisation is established, downward tracking of RTO is suspended
* Set to zero to defeat this behaviour.
*/
private volatile int stabalizationAckCount = 0;
/**
* retrans queue element
*/
private static class RetrQElt {
/**
* sequence number of this message.
*/
final int seqnum;
/**
* the message
*/
final Message msg;
/**
* absolute time of original enqueuing
*/
final long enqueuedAt;
/**
* has been marked as retransmission
*/
int marked;
/**
* absolute time when this msg was last transmitted
*/
long sentAt;
/**
* Constructor for the RetrQElt object
*
* @param seqnum sequence number
* @param msg the message
*/
public RetrQElt(int seqnum, Message msg) {
this.seqnum = seqnum;
this.msg = msg;
this.enqueuedAt = TimeUtils.timeNow();
this.sentAt = this.enqueuedAt;
this.marked = 0;
}
}
/**
* The collection of messages available for re-transmission.
*/
protected final List<RetrQElt> retrQ = new ArrayList<RetrQElt>();
/**
* Constructor for the ReliableOutputStream object
*
* @param outgoing the outgoing object
*/
public ReliableOutputStream(Outgoing outgoing) {
// By default use the old behaviour: fixed fc with a rwin of 20
this(outgoing, new FixedFlowControl(20));
}
/**
* Constructor for the ReliableOutputStream object
*
* @param outgoing the outgoing object
* @param fc flow-control
*/
public ReliableOutputStream(Outgoing outgoing, FlowControl fc) {
this.outgoing = outgoing;
String minrto = System.getProperty( "net.jxta.reliable.minrto" );
if( null != minrto ){
this.minRTO = Integer.parseInt( minrto );
}
String ackStabilizaton = System.getProperty( "net.jxta.reliable.stablizeacks" );
if( null != ackStabilizaton ){
this.stabalizationAckCount = Integer.parseInt( ackStabilizaton );
}
// initial RTO is set to maxRTO so as to give time
// to the receiver to catch-up
this.RTO = outgoing.getMaxRetryAge();
this.mrrIQFreeSpace = rmaxQSize;
this.rttThreshold = rmaxQSize;
// Init last ACK Time to now
this.lastACKTime = TimeUtils.timeNow();
this.sackRetransTime = TimeUtils.timeNow();
// Attach the flowControl module
this.fc = fc;
// Update our initial rwindow to reflect fc's initial value
this.rwindow = fc.getRwindow();
}
/**
* {@inheritDoc}
*/
@Override
public void close() throws IOException {
flush();
super.close();
localClosed = true;
closedAt = TimeUtils.toAbsoluteTimeMillis(lingerDelay);
synchronized (retrQ) {
retrQ.notifyAll();
}
if (Logging.SHOW_INFO && LOG.isLoggable(Level.INFO)) {
LOG.info("Closed.");
}
}
public long getLingerDelay() {
return lingerDelay;
}
public void setLingerDelay(long linger) {
if (linger < 0) {
throw new IllegalArgumentException("Linger delay may not be negative.");
}
if (0 == linger) {
linger = Long.MAX_VALUE;
}
lingerDelay = linger;
}
/**
* Return the size of the buffers we are using for accumulating writes.
*
* @return size of our write buffers.
*/
public int setSendBufferSize() {
return writeBufferSize;
}
/**
* Set the size of the buffers we will use for accumulating writes.
*
* @param size The desired size of write buffers.
* @throws IOException if an I/O error occurs. In particular, an IOException is thrown if the output stream is closed.
*/
public void setSendBufferSize(int size) throws IOException {
if (size <= 0) {
throw new IllegalArgumentException("Send buffer size may not be <= 0");
}
// Flush any existing buffered writes. Then next write will use the new buffer size.
synchronized (writeLock) {
flushBuffer();
writeBufferSize = size;
}
}
/**
* We have received a close request from the remote peer. We must stop
* retransmissions immediately.
*/
public void hardClose() {
remoteClosed = true;
closedAt = TimeUtils.timeNow();
// Clear the retry queue. Remote side doesn't care.
synchronized (retrQ) {
retrQ.clear();
retrQ.notifyAll();
}
// Clear the write queue. Remote side doesn't care.
synchronized (writeLock) {
writeCount = 0;
writeBuffer = null;
}
if (Logging.SHOW_INFO && LOG.isLoggable(Level.INFO)) {
LOG.info("Hard closed.");
}
}
/**
* Returns the state of the stream
*
* @return true if closed
*/
public boolean isClosed() {
return localClosed || remoteClosed;
}
/**
* {@inheritDoc}
*/
@Override
public void flush() throws IOException {
synchronized (writeLock) {
flushBuffer();
}
}
/**
* {@inheritDoc}
*/
@Override
public void write(int b) throws IOException {
write(new byte[] { (byte) b }, 0, 1);
}
/**
* {@inheritDoc}
*/
@Override
public void write(byte b[], int off, int len) throws IOException {
synchronized (writeLock) {
if (isClosed()) {
throw new IOException("stream is closed");
}
if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
}
if (len == 0) {
return;
}
int current = off;
int end = current + len;
while (current < end) {
if (0 == writeCount) {
// No bytes written? We need a new buffer.
writeBuffer = new byte[writeBufferSize];
}
int remain = end - current;
int available = writeBuffer.length - writeCount;
int copy = Math.min(available, remain);
System.arraycopy(b, current, writeBuffer, writeCount, copy);
writeCount += copy;
current += copy;
if (writeBuffer.length == writeCount) {
flushBuffer();
}
}
}
}
/**
* Flush the internal buffer. {@code writeLock} must have been previously
* acquired.
* @throws IOException if an I/O error occurs. In particular, an IOException is thrown if the output stream is closed.
*/
private void flushBuffer() throws IOException {
if (writeCount > 0) {
// send the message
try {
writeBuffer(writeBuffer, 0, writeCount);
} finally {
writeCount = 0;
writeBuffer = null;
}
}
}
/**
* Write the internal buffer. {@code writeLock} must have been previously
* acquired.
*
* @param b data
* @param off the start offset in the data.
* @param len the number of bytes to write.
* @throws IOException if an I/O error occurs. In particular, an IOException is thrown if the output stream is closed.
*/
private void writeBuffer(byte[] b, int off, int len) throws IOException {
if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
}
if (len == 0) {
return;
}
if (null == retrThread) {
retrThread = new Thread(new Retransmitter(), "JXTA Reliable Retransmiter for " + this);
retrThread.setDaemon(true);
retrThread.start();
}
// allocate new message
Message jmsg = new Message();
synchronized (retrQ) {
while (true) {
if (isClosed()) {
throw new IOException("Connection is " + (localClosed ? "closing" : "closed"));
}
if (retrQ.size() > Math.min(rwindow, mrrIQFreeSpace * 2)) {
try {
retrQ.wait(1000);
} catch (InterruptedException ignored) {// ignored
}
continue;
}
break;
}
int sequenceToUse = sequenceNumber.incrementAndGet();
MessageElement element = new ByteArrayMessageElement(Integer.toString(sequenceToUse), Defs.MIME_TYPE_BLOCK, b, off
,
len, null);
jmsg.addMessageElement(Defs.NAMESPACE, element);
RetrQElt retrQel = new RetrQElt(sequenceToUse, jmsg.clone());
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("Reliable WRITE : seqn#" + sequenceNumber + " length=" + len);
}
// place copy on retransmission queue
retrQ.add(retrQel);
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("Retrans Enqueue added seqn#" + sequenceNumber + " retrQ.size()=" + retrQ.size());
}
}
outgoing.send(jmsg);
mrrIQFreeSpace--;
// assume we have now taken a slot
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("SENT : seqn#" + sequenceNumber + " length=" + len);
}
}
/**
* Serialize a JXTA message as a reliable message.
*
* <p/>This method bypasses the built-in buffering and ignores the MTU size.
*
* @param msg message to send
* @return message sequence number
* @throws IOException if an I/O error occurs
*/
public int send(Message msg) throws IOException {
WireFormatMessage msgSerialized = WireFormatMessageFactory.toWire(msg, Defs.MIME_TYPE_MSG, null);
ByteArrayOutputStream baos = new ByteArrayOutputStream((int) msgSerialized.getByteLength());
msgSerialized.sendToStream(baos);
baos.close();
byte[] bytes = baos.toByteArray();
synchronized (writeLock) {
flushBuffer();
writeBuffer(bytes, 0, bytes.length);
return sequenceNumber.get();
}
}
/**
* Gets the maxAck attribute of the ReliableOutputStream object
*
* @return The maxAck value
*/
public int getMaxAck() {
return maxACK;
}
/**
* Gets the seqNumber attribute of the ReliableOutputStream object
*
* @return The seqNumber value
*/
public int getSeqNumber() {
return sequenceNumber.get();
}
/**
* Gets the queueFull attribute of the ReliableOutputStream object
*
* @return The queueFull value
*/
protected boolean isQueueFull() {
return mrrIQFreeSpace < 1;
}
/**
* Gets the queueEmpty attribute of the ReliableOutputStream object.
*
* @return {@code true} if the queue is empty otherwise {@code false}.
*/
public boolean isQueueEmpty() {
synchronized (retrQ) {
return retrQ.isEmpty();
}
}
/**
* Waits for the retransmit queue to become empty.
*
* @param timeout The relative time in milliseconds to wait for the queue to
* become empty.
* @return {@code true} if the queue is empty otherwise {@code false}.
* @throws InterruptedException if interrupted
*/
public boolean waitQueueEmpty(long timeout) throws InterruptedException {
long timeoutAt = TimeUtils.toAbsoluteTimeMillis(timeout);
synchronized (retrQ) {
while (!retrQ.isEmpty() && (TimeUtils.timeNow() < timeoutAt)) {
long sleepTime = TimeUtils.toRelativeTimeMillis(timeoutAt);
if (sleepTime > 0) {
retrQ.wait(sleepTime);
}
}
return retrQ.isEmpty();
}
}
/**
* wait for activity on the retry queue
*
* @param timeout timeout in millis
* @throws InterruptedException when interrupted
*/
public void waitQueueEvent(long timeout) throws InterruptedException {
synchronized (retrQ) {
retrQ.wait(timeout);
}
}
/**
* Calculates a message retransmission time-out
*
* @param dt base time
* @param msgSeqNum Message sequence number
*/
private void calcRTT(long dt, int msgSeqNum) {
if (numACKS.incrementAndGet() == 1) {
// First ACK arrived. We can start computing aveRTT on the messages
// we send from now on.
rttThreshold = sequenceNumber.get() + 1;
}
if (msgSeqNum > rttThreshold) {
// Compute only when it has stabilized a bit
// Since the initial mrrIQFreeSpace is small; the first few
// messages will be sent early on and may wait a long time
// for the return channel to initialize. After that things
// start flowing and RTT becomes relevant.
// Carefull with the computation: integer division with round-down
// causes cumulative damage: the ave never goes up if this is not
// taken care of. We keep the reminder from one round to the other.
// What follows is the calculation of exponential smoothing variable with
// a smoothing constant (lambda) set to 1/3. The previous value of 1/9 was
// a bit too small, and caused the implementation to be somewhat slow
// to adjust to changes in network latency. 1/3 gives a
// more reliable mean and a smaller standard deviation across various network conditions
//
// See http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc431.htm for
// more discussion on the exponential smoothing algorithm(s) and running averages
if (!aveRTTreset) {
aveRTT = dt;
aveRTTreset = true;
} else {
long tmp = (6 * aveRTT) + ((6 * remRTT) / 9) + (3 * dt);
aveRTT = tmp / 9;
remRTT = tmp - aveRTT * 9;
}
}
long newRTO = aveRTT * 2;
// Unless stabalizationAckCount is zero, after a period of stream stabilisation, do not reduce the RTO value further.
// This avoids the situation where a few small message sends reduce the RTO so much that when a large
// message is sent it immediately requires repetitive retransmission until the value of RTO climbs again.
// This is most apparent when using 'slow' relayed streams and large MTUs.
if( 0 != this.stabalizationAckCount && numACKS.get() > this.stabalizationAckCount ){
RTO = Math.max( RTO, newRTO );
} else {
// Enforce a min/max
RTO = Math.max(newRTO, minRTO);
RTO = Math.min(RTO, outgoing.getMaxRetryAge());
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("RTT = " + dt + "ms aveRTT = " + aveRTT + "ms" + " RTO = " + RTO + "ms" + " maxRTO = " + outgoing.getMaxRetryAge() + "ms");
}
}
/**
* @param iq Description of the Parameter
* @return Description of the Return Value
*/
private int calcAVEIQ(int iq) {
int n = nIQTests;
nIQTests += 1;
aveIQSize = ((n * aveIQSize) + iq) / nIQTests;
return aveIQSize;
}
/**
* process an incoming message
*
* @param msg message to process
*/
public void recv(Message msg) {
Iterator<MessageElement> eachACK = msg.getMessageElements(Defs.NAMESPACE, Defs.MIME_TYPE_ACK);
while (eachACK.hasNext()) {
MessageElement elt = eachACK.next();
eachACK.remove();
int sackCount = ((int) elt.getByteLength() / 4) - 1;
try {
DataInputStream dis = new DataInputStream(elt.getStream());
int seqack = dis.readInt();
int[] sacs = new int[sackCount];
for (int eachSac = 0; eachSac < sackCount; eachSac++) {
sacs[eachSac] = dis.readInt();
}
Arrays.sort(sacs);
// take care of the ACK here;
ackReceived(seqack, sacs);
} catch (IOException failed) {
if (Logging.SHOW_WARNING && LOG.isLoggable(Level.WARNING)) {
LOG.log(Level.WARNING, "Failure processing ACK", failed);
}
}
}
}
/**
* Process an ACK Message. We remove ACKed
* messages from the retry queue. We only
* acknowledge messages received in sequence.
* <p/>
* The seqnum is for the largest unacknowledged seqnum
* the recipient has received.
* <p/>
* The sackList is a sequence of all of the
* received messages in the sender's input Q. All
* will be sequence numbers higher than the
* sequential ACK seqnum.
* <p/>
* Recipients are passive and only ack upon the
* receipt of an in sequence message.
* <p/>
* They depend on our RTO to fill holes in message
* sequences.
*
* @param seqnum message sequence number
* @param sackList array of message sequence numbers
*/
public void ackReceived(int seqnum, int[] sackList) {
int numberACKed = 0;
long rttCalcDt = 0;
int rttCalcSeqnum = -1;
long fallBackDt = 0;
int fallBackSeqnum = -1;
// remove acknowledged messages from retrans Q.
synchronized (retrQ) {
lastACKTime = TimeUtils.timeNow();
fc.ackEventBegin();
maxACK = Math.max(maxACK, seqnum);
// dump the current Retry queue and the SACK list
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
StringBuilder dumpRETRQ = new StringBuilder("ACK RECEIVE : " + Integer.toString(seqnum));
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
dumpRETRQ.append('\n');
}
dumpRETRQ.append("\tRETRQ (size=").append(retrQ.size()).append(")");
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
dumpRETRQ.append(" : ");
for (int y = 0; y < retrQ.size(); y++) {
if (0 != y) {
dumpRETRQ.append(", ");
}
RetrQElt r = retrQ.get(y);
dumpRETRQ.append(r.seqnum);
}
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
dumpRETRQ.append('\n');
}
dumpRETRQ.append("\tSACKLIST (size=").append(sackList.length).append(")");
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
dumpRETRQ.append(" : ");
for (int y = 0; y < sackList.length; y++) {
if (0 != y) {
dumpRETRQ.append(", ");
}
dumpRETRQ.append(sackList[y]);
}
}
LOG.fine(dumpRETRQ.toString());
}
Iterator<RetrQElt> eachRetryQueueEntry = retrQ.iterator();
// First remove monotonically increasing seq#s in retrans vector
while (eachRetryQueueEntry.hasNext()) {
RetrQElt retrQElt = eachRetryQueueEntry.next();
if (retrQElt.seqnum > seqnum) {
break;
}
// Acknowledged
eachRetryQueueEntry.remove();
// Update RTT, RTO. Use only those that where acked
// w/o retrans otherwise the number may be phony (ack
// of first xmit received just after resending => RTT
// seems small). Also, we keep the worst of the bunch
// we encounter. If we really can't find a single
// non-resent message, we make do with a pessimistic
// approximation: we must not be left behind with an
// RTT that's too short, we'd keep resending like
// crazy.
long enqueuetime = retrQElt.enqueuedAt;
long dt = TimeUtils.toRelativeTimeMillis(lastACKTime, enqueuetime);
// Update RTT, RTO
if (retrQElt.marked == 0) {
if (dt > rttCalcDt) {
rttCalcDt = dt;
rttCalcSeqnum = retrQElt.seqnum;
}
} else {
// In case we find no good candidate, make
// a guess by dividing by the number of attempts
// and keep the worst of them too. Since we
// know it may be too short, we will not use it
// if shortens rtt.
dt /= (retrQElt.marked + 1);
if (dt > fallBackDt) {
fallBackDt = dt;
fallBackSeqnum = retrQElt.seqnum;
}
}
fc.packetACKed(retrQElt.seqnum);
retrQElt = null;
numberACKed++;
}
// Update last accessed time in response to getting seq acks.
if (numberACKed > 0) {
outgoing.setLastAccessed(TimeUtils.timeNow());
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("SEQUENTIALLY ACKD SEQN = " + seqnum + ", (" + numberACKed + " acked)");
}
// most recent remote IQ free space
mrrIQFreeSpace = rmaxQSize - sackList.length;
// let's look at average sacs.size(). If it is big, then this
// probably means we must back off because the system is slow.
// Our retrans Queue can be large and we can overwhelm the
// receiver with retransmissions.
// We will keep the rwin <= ave real input queue size.
int aveIQ = calcAVEIQ(sackList.length);
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("remote IQ free space = " + mrrIQFreeSpace + " remote avg IQ occupancy = " + aveIQ);
}
int retrans = 0;
if (sackList.length > 0) {
Iterator<RetrQElt> eachRetrQElement = retrQ.iterator();
int currentSACK = 0;
while (eachRetrQElement.hasNext()) {
RetrQElt retrQElt = eachRetrQElement.next();
while (sackList[currentSACK] < retrQElt.seqnum) {
currentSACK++;
if (currentSACK == sackList.length) {
break;
}
}
if (currentSACK == sackList.length) {
break;
}
if (sackList[currentSACK] == retrQElt.seqnum) {
fc.packetACKed(retrQElt.seqnum);
numberACKed++;
eachRetrQElement.remove();
// Update RTT, RTO. Use only those that where acked w/o retrans
// otherwise the number is completely phony.
// Also, we keep the worst of the bunch we encounter.
long enqueuetime = retrQElt.enqueuedAt;
long dt = TimeUtils.toRelativeTimeMillis(lastACKTime, enqueuetime);
// Update RTT, RTO
if (retrQElt.marked == 0) {
if (dt > rttCalcDt) {
rttCalcDt = dt;
rttCalcSeqnum = retrQElt.seqnum;
}
} else {
// In case we find no good candidate, make
// a guess by dividing by the number of attempts
// and keep the worst of them too. Since we
// know it may be too short, we will not use it
// if shortens rtt.
dt /= (retrQElt.marked + 1);
if (dt > fallBackDt) {
fallBackDt = dt;
fallBackSeqnum = retrQElt.seqnum;
}
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("SACKD SEQN = " + retrQElt.seqnum);
}
// GC this stuff
retrQElt = null;
} else {
// Retransmit? Only if there is a hole in the selected
// acknowledgement list. Otherwise let RTO deal.
// Given that this SACK acknowledged messages still
// in the retrQ:
// seqnum is the max consectively SACKD message.
// seqnum < retrQElt.seqnum means a message has not reached
// receiver. EG: sacklist == 10,11,13 seqnum == 11
// We retransmit 12.
if (seqnum < retrQElt.seqnum) {
fc.packetMissing(retrQElt.seqnum);
retrans++;
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("RETR: Fill hole, SACK, seqn#" + retrQElt.seqnum + ", Window =" + retrans);
}
}
}
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("SELECTIVE ACKD (" + numberACKed + ") " + retrans + " retrans wanted");
}
}
// Compute aveRTT on the most representative message,
// if any. That's the most accurate data.
// Failing that we use the fall back, provided that it not
// more recent than aveRTT ago - that would decrease aveRTT
// and in the absence of solid data, we do not want to take
// that risk.
if (rttCalcSeqnum != -1) {
calcRTT(rttCalcDt, rttCalcSeqnum);
// get fc to recompute rwindow
rwindow = fc.ackEventEnd(rmaxQSize, aveRTT, rttCalcDt);
} else if ((fallBackSeqnum != -1) && (fallBackDt > aveRTT)) {
calcRTT(fallBackDt, fallBackSeqnum);
// get fc to recompute rwindow
rwindow = fc.ackEventEnd(rmaxQSize, aveRTT, fallBackDt);
}
retrQ.notifyAll();
}
}
/**
* retransmit unacknowledged messages
*
* @param rwin max number of messages to retransmit
* @param triggerTime base time
* @return number of messages retransmitted.
*/
private int retransmit(int rwin, long triggerTime) {
List<RetrQElt> retransMsgs = new ArrayList<RetrQElt>();
int numberToRetrans;
// build a list of retries.
synchronized (retrQ) {
numberToRetrans = Math.min(retrQ.size(), rwin);
if (numberToRetrans > 0 && Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("Number of messages pending retransmit =" + numberToRetrans);
}
for (int j = 0; j < numberToRetrans; j++) {
RetrQElt r = retrQ.get(j);
// Mark message as retransmission
// need to know if a msg was retr or not for RTT eval
if (r.marked == 0) {
// First time: we're here because this message has not arrived, but
// the next one has. It may be an out of order message.
// Experience shows that such a message rarely arrives older than
// 1.2 * aveRTT. Beyond that, it's lost. It is also rare that we
// detect a hole within that delay. So, often enough, as soon as
// a hole is detected, it's time to resend...but not always.
if (TimeUtils.toRelativeTimeMillis(triggerTime, r.sentAt) < (6 * aveRTT) / 5) {
// Nothing to worry about, yet.
continue;
}
} else {
// That one has been retransmitted at least once already.
// So, we don't have much of a clue other than the age of the
// last transmission. It is unlikely that it arrives before aveRTT/2
// but we have to anticipate its loss at the risk of making dupes.
// Otherwise the receiver will reach the hole, and that's really
// expensive. (Think that we've been trying for a while already.)
if (TimeUtils.toRelativeTimeMillis(triggerTime, r.sentAt) < aveRTT) {
// Nothing to worry about, yet.
continue;
}
}
r.marked++;
// Make a copy to for sending
retransMsgs.add(r);
}
}
// send the retries.
int retransmitted = 0;
Iterator<RetrQElt> eachRetrans = retransMsgs.iterator();
while (eachRetrans.hasNext()) {
RetrQElt r = eachRetrans.next();
eachRetrans.remove();
try {
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("RETRANSMIT seqn#" + r.seqnum);
}
Message sending = r.msg;
// its possible that the message was
// acked while we were working in this
// case r.msg will have been nulled.
if (null != sending) {
sending = sending.clone();
sending.replaceMessageElement(Defs.NAMESPACE, RETELT);
if (outgoing.send(sending)) {
r.sentAt = TimeUtils.timeNow();
mrrIQFreeSpace--;
// assume we have now taken a slot
retransmitted++;
} else {
break;
// don't bother continuing sending now.
}
}
} catch (IOException e) {
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.log(Level.FINE, "FAILED RETRANS seqn#" + r.seqnum, e);
}
break;
// don't bother continuing.
}
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("RETRANSMITED " + retransmitted + " of " + numberToRetrans);
}
return retransmitted;
}
/**
* Retransmission daemon thread
*/
private class Retransmitter implements Runnable {
int nAtThisRTO = 0;
volatile int nretransmitted = 0;
/**
* Constructor for the Retransmitter object
*/
public Retransmitter() {
if (Logging.SHOW_INFO && LOG.isLoggable(Level.INFO)) {
LOG.info("STARTED Reliable Retransmitter, RTO = " + RTO);
}
}
/**
* Gets the retransCount attribute of the Retransmitter object
*
* @return The retransCount value
*/
public int getRetransCount() {
return nretransmitted;
}
/**
* {@inheritDoc}
*
* <p/>Main processing method for the Retransmitter object
*/
public void run() {
try {
int idleCounter = 0;
while (TimeUtils.toRelativeTimeMillis(closedAt) > 0) {
long conn_idle = TimeUtils.toRelativeTimeMillis(TimeUtils.timeNow(), outgoing.getLastAccessed());
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine(outgoing + " idle for " + conn_idle);
}
// check to see if we have not idled out.
if (outgoing.getIdleTimeout() < conn_idle) {
if (Logging.SHOW_INFO && LOG.isLoggable(Level.INFO)) {
LOG.info("Shutting down idle " + "connection " + outgoing);
}
break;
}
long sinceLastACK;
long oldestInQueueWait;
synchronized (retrQ) {
try {
if (RTO > 0) {
retrQ.wait(RTO);
}
Thread.currentThread().setName(
"JXTA Reliable Retransmiter for " + this + " Queue size : " + retrQ.size());
} catch (InterruptedException e) {// ignored
}
if (TimeUtils.toRelativeTimeMillis(closedAt) <= 0) {
break;
}
// see if we recently did a retransmit triggered by a SACK
long sinceLastSACKRetr = TimeUtils.toRelativeTimeMillis(TimeUtils.timeNow(), sackRetransTime);
if (sinceLastSACKRetr < RTO) {
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("SACK retrans " + sinceLastSACKRetr + "ms ago");
}
continue;
}
// See how long we've waited since RTO was set
sinceLastACK = TimeUtils.toRelativeTimeMillis(TimeUtils.timeNow(), lastACKTime);
if (!retrQ.isEmpty()) {
RetrQElt elt = retrQ.get(0);
oldestInQueueWait = TimeUtils.toRelativeTimeMillis(TimeUtils.timeNow(), elt.enqueuedAt);
} else {
oldestInQueueWait = 0;
}
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("Last ACK " + sinceLastACK + "ms ago. Age of oldest in Queue " + oldestInQueueWait + "ms.");
}
// see if the queue has gone dead
if (oldestInQueueWait > outgoing.getMaxRetryAge()) {
if (Logging.SHOW_INFO && LOG.isLoggable(Level.INFO)) {
LOG.info("Shutting down stale connection " + outgoing);
}
break;
}
// get real wait as max of age of oldest in retrQ and
// lastAck time
long realWait = Math.max(oldestInQueueWait, sinceLastACK);
// Retransmit only if RTO has expired.
// a. real wait time is longer than RTO
// b. oldest message on Q has been there longer
// than RTO. This is necessary because we may
// have just sent a message, and we do not
// want to overrun the receiver. Also, we
// do not want to restransmit a message that
// has not been idle for the RTO.
if ((realWait >= RTO) && (oldestInQueueWait >= RTO)) {
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("RTO RETRANSMISSION [" + rwindow + "]");
}
// retransmit
int retransed = retransmit(rwindow, TimeUtils.timeNow());
// Total
nretransmitted += retransed;
// number at this RTO
nAtThisRTO += retransed;
// See if real wait is too long and queue is non-empty
// Remote may be dead - double until max.
// Double after window restransmitted msgs at this RTO
// exceeds the rwindow, and we've had no response for
// twice the current RTO.
if ((retransed > 0) && (realWait >= 2 * RTO) && (nAtThisRTO >= 2 * rwindow)) {
RTO = (realWait > outgoing.getMaxRetryAge() ? outgoing.getMaxRetryAge() : 2 * RTO);
nAtThisRTO = 0;
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine(
"RETRANSMISSION " + retransed + " retrans " + nAtThisRTO + " at this RTO (" + RTO + ") "
+ nretransmitted + " total retrans");
}
} else {
idleCounter += 1;
// reset RTO to min if we are idle
if (idleCounter == 2) {
RTO = minRTO;
idleCounter = 0;
nAtThisRTO = 0;
}
if (Logging.SHOW_FINE && LOG.isLoggable(Level.FINE)) {
LOG.fine("IDLE : RTO=" + RTO + " WAIT=" + realWait);
}
}
}
} catch (Throwable all) {
LOG.log(Level.SEVERE, "Uncaught Throwable in thread :" + Thread.currentThread().getName(), all);
} finally {
hardClose();
retrThread = null;
if (Logging.SHOW_INFO && LOG.isLoggable(Level.INFO)) {
LOG.info("STOPPED Retransmit thread");
}
}
}
}
}