/*
* Created on Jan 30, 2014
* Created by Paul Gardner
*
* Copyright 2014 Azureus Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License only.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
*/
package com.vuze.client.plugins.utp.loc.v2;
import java.net.Inet6Address;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.util.*;
import org.gudy.azureus2.core3.util.Debug;
import com.vuze.client.plugins.utp.UTPProvider;
import com.vuze.client.plugins.utp.UTPProviderCallback;
import com.vuze.client.plugins.utp.UTPProviderException;
import com.vuze.client.plugins.utp.loc.UTPSocket;
import com.vuze.client.plugins.utp.loc.UTPTranslated;
public class
UTPTranslatedV2
implements UTPTranslated
{
private static final boolean ASSERTS = false;
static{
if ( ASSERTS ){
System.err.println( "**** UTP aserts enabled ****" );
}
}
static void
_assert(
boolean b )
{
if ( !b ){
Debug.out("derp");
}
}
static void
_assert(
int i )
{
if ( i ==0 ){
Debug.out("derp");
}
}
static void
_assert(
Object o )
{
if ( o == null ){
Debug.out("derp");
}
}
public static final int INT_MAX = 0x7fffffff;
public static final int UINT_MAX = 0xffffffff;
public static final long UINT_MAX_L = 0xffffffffL;
public static final long INT64_MAX = 0x7fffffffffffffffL;
final private static class
UnsignedShort
{
int i;
UnsignedShort()
{
i = 0;
}
UnsignedShort(
int _i )
{
i = _i&0xffff;
}
final void
set(
int num )
{
i = num&0xffff;
}
final void
inc()
{
i = (i+1)&0xffff;
}
final void
dec()
{
i = (i-1)&0xffff;
}
}
final private static class
UnsignedInteger
{
final long MASK = 0x00000000ffffffffL;
long l;
UnsignedInteger()
{
l = 0;
}
UnsignedInteger(
long num )
{
l = num&MASK;
}
final void
set(
long num )
{
l = num&MASK;
}
final void
set(
UnsignedInteger num )
{
l = num.l;
}
final long
minus(
UnsignedInteger other )
{
return( (l - other.l )&MASK );
}
final long
minus(
long num )
{
return( ( l - (num&MASK )) & MASK );
}
final long
plus(
long num )
{
return( ( l + (num&MASK )) & MASK );
}
}
public static long uint32( long l ){ return( l&UINT_MAX ); }
// **** utp.h ****
/*
enum {
UTP_UDP_DONTFRAG = 2, // Used to be a #define as UDP_IP_DONTFRAG
};
*/
public static final int UTP_UDP_DONTFRAG = 2;
/*
enum {
// socket has reveived syn-ack (notification only for outgoing connection completion)
// this implies writability
UTP_STATE_CONNECT = 1,
// socket is able to send more data
UTP_STATE_WRITABLE = 2,
// connection closed
UTP_STATE_EOF = 3,
// socket is being destroyed, meaning all data has been sent if possible.
// it is not valid to refer to the socket after this state change occurs
UTP_STATE_DESTROYING = 4,
};
*/
//extern const char *utp_state_names[];
// Errors codes that can be passed to UTP_ON_ERROR callback
/*
enum {
UTP_ECONNREFUSED = 0,
UTP_ECONNRESET,
UTP_ETIMEDOUT,
};
*/
public static final int UTP_ECONNREFUSED = 0;
public static final int UTP_ECONNRESET = 1;
public static final int UTP_ETIMEDOUT = 2;
// extern const char *utp_error_code_names[];
/*
enum {
// callback names
UTP_ON_FIREWALL = 0,
UTP_ON_ACCEPT,
UTP_ON_CONNECT,
UTP_ON_ERROR,
UTP_ON_READ,
UTP_ON_OVERHEAD_STATISTICS,
UTP_ON_STATE_CHANGE,
UTP_GET_READ_BUFFER_SIZE,
UTP_ON_DELAY_SAMPLE,
UTP_GET_UDP_MTU,
UTP_GET_UDP_OVERHEAD,
UTP_GET_MILLISECONDS,
UTP_GET_MICROSECONDS,
UTP_GET_RANDOM,
UTP_LOG,
UTP_SENDTO,
// context and socket options that may be set/queried
UTP_LOG_NORMAL,
UTP_LOG_MTU,
UTP_LOG_DEBUG,
UTP_SNDBUF,
UTP_RCVBUF,
UTP_TARGET_DELAY,
UTP_ARRAY_SIZE, // must be last
};
*/
public static final int UTP_ON_FIREWALL = 0;
public static final int UTP_ON_ACCEPT = 1;
public static final int UTP_ON_CONNECT = 2;
public static final int UTP_ON_ERROR = 3;
public static final int UTP_ON_READ = 4;
public static final int UTP_ON_OVERHEAD_STATISTICS = 5;
public static final int UTP_ON_STATE_CHANGE = 6;
public static final int UTP_GET_READ_BUFFER_SIZE = 7;
public static final int UTP_ON_DELAY_SAMPLE = 8;
public static final int UTP_GET_UDP_MTU = 9;
public static final int UTP_GET_UDP_OVERHEAD = 10;
public static final int UTP_GET_MILLISECONDS = 11;
public static final int UTP_GET_MICROSECONDS = 12;
public static final int UTP_GET_RANDOM = 13;
public static final int UTP_LOG = 14;
public static final int UTP_SENDTO = 15;
// context and socket options that may be set/queried
public static final int UTP_LOG_NORMAL = 16;
public static final int UTP_LOG_MTU = 17;
public static final int UTP_LOG_DEBUG = 18;
public static final int UTP_SNDBUF = 19;
public static final int UTP_RCVBUF = 20;
public static final int UTP_TARGET_DELAY = 21;
public static final int UTP_ARRAY_SIZE = 22; // must be last
//extern const char *utp_callback_names[];
private static class
_utp_callback_arguments
{
utp_context context;
UTPSocketImpl socket;
int len;
int flags;
int callback_type;
byte[] buf;
ByteBuffer bbuf;
//union {
// const struct sockaddr *address;
// int send;
// int sample_ms;
// int error_code;
// int state;
//};
InetSocketAddress address;
int send;
int sample_ms;
int error_code;
int state;
//union {
// socklen_t address_len;
// int type;
//};
int type;
};
// single threaded so optimise away object creation
private static _utp_callback_arguments utp_callback_arguments = new _utp_callback_arguments();
//typedef uint64 utp_callback_t(utp_callback_arguments *);
interface
utp_callback_t
{
public long
callback(
_utp_callback_arguments args );
}
// utp_callbacks.c
int utp_call_on_firewall(utp_context ctx, InetSocketAddress address )
{
if (ctx.callbacks[UTP_ON_FIREWALL]==null) return 0;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_ON_FIREWALL;
args.context = ctx;
args.socket = null;
args.address = address;
//args.address_len = address_len;
return (int)ctx.callbacks[UTP_ON_FIREWALL].callback(args);
}
void utp_call_on_accept(utp_context ctx, UTPSocketImpl socket, InetSocketAddress address)
{
if (ctx.callbacks[UTP_ON_ACCEPT]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_ON_ACCEPT;
args.context = ctx;
args.socket = socket;
args.address = address;
//args.address_len = address_len;
ctx.callbacks[UTP_ON_ACCEPT].callback(args);
}
void utp_call_on_connect(utp_context ctx, UTPSocketImpl socket)
{
if (ctx.callbacks[UTP_ON_CONNECT]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_ON_CONNECT;
args.context = ctx;
args.socket = socket;
ctx.callbacks[UTP_ON_CONNECT].callback(args);
}
void utp_call_on_error(utp_context ctx, UTPSocketImpl socket, int error_code)
{
if (ctx.callbacks[UTP_ON_ERROR]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_ON_ERROR;
args.context = ctx;
args.socket = socket;
args.error_code = error_code;
ctx.callbacks[UTP_ON_ERROR].callback(args);
}
void utp_call_on_read(utp_context ctx, UTPSocketImpl socket, ByteBuffer buf, int len)
{
if (ctx.callbacks[UTP_ON_READ]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_ON_READ;
args.context = ctx;
args.socket = socket;
args.bbuf = buf;
args.len = len;
ctx.callbacks[UTP_ON_READ].callback(args);
}
void utp_call_on_overhead_statistics(utp_context ctx, UTPSocketImpl socket, int send, int len, int type)
{
if (ctx.callbacks[UTP_ON_OVERHEAD_STATISTICS]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_ON_OVERHEAD_STATISTICS;
args.context = ctx;
args.socket = socket;
args.send = send;
args.len = len;
args.type = type;
ctx.callbacks[UTP_ON_OVERHEAD_STATISTICS].callback(args);
}
void utp_call_on_delay_sample(utp_context ctx, UTPSocketImpl socket, int sample_ms)
{
if (ctx.callbacks[UTP_ON_DELAY_SAMPLE]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_ON_DELAY_SAMPLE;
args.context = ctx;
args.socket = socket;
args.sample_ms = sample_ms;
ctx.callbacks[UTP_ON_DELAY_SAMPLE].callback(args);
}
void utp_call_on_state_change(utp_context ctx, UTPSocketImpl socket, int state)
{
if (ctx.callbacks[UTP_ON_STATE_CHANGE]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_ON_STATE_CHANGE;
args.context = ctx;
args.socket = socket;
args.state = state;
ctx.callbacks[UTP_ON_STATE_CHANGE].callback(args);
}
short utp_call_get_udp_mtu(utp_context ctx, UTPSocketImpl socket, InetSocketAddress address)
{
if (ctx.callbacks[UTP_GET_UDP_MTU]==null) return 0;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_GET_UDP_MTU;
args.context = ctx;
args.socket = socket;
args.address = address;
//args.address_len = address_len;
return (short)ctx.callbacks[UTP_GET_UDP_MTU].callback(args);
}
short utp_call_get_udp_overhead(utp_context ctx, UTPSocketImpl socket, InetSocketAddress address)
{
if (ctx.callbacks[UTP_GET_UDP_OVERHEAD]==null) return 0;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_GET_UDP_OVERHEAD;
args.context = ctx;
args.socket = socket;
args.address = address;
//args.address_len = address_len;
return (short)ctx.callbacks[UTP_GET_UDP_OVERHEAD].callback(args);
}
long utp_call_get_milliseconds(utp_context ctx, UTPSocketImpl socket)
{
if (ctx.callbacks[UTP_GET_MILLISECONDS]==null) return 0;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_GET_MILLISECONDS;
args.context = ctx;
args.socket = socket;
return ctx.callbacks[UTP_GET_MILLISECONDS].callback(args);
}
long utp_call_get_microseconds(utp_context ctx, UTPSocketImpl socket)
{
if (ctx.callbacks[UTP_GET_MICROSECONDS]==null) return 0;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_GET_MICROSECONDS;
args.context = ctx;
args.socket = socket;
return ctx.callbacks[UTP_GET_MICROSECONDS].callback(args);
}
int utp_call_get_random(utp_context ctx, UTPSocketImpl socket)
{
if (ctx.callbacks[UTP_GET_RANDOM]==null) return 0;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_GET_RANDOM;
args.context = ctx;
args.socket = socket;
return (int)ctx.callbacks[UTP_GET_RANDOM].callback(args);
}
int utp_call_get_read_buffer_size(utp_context ctx, UTPSocketImpl socket)
{
if (ctx.callbacks[UTP_GET_READ_BUFFER_SIZE]==null) return 0;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_GET_READ_BUFFER_SIZE;
args.context = ctx;
args.socket = socket;
return (int)ctx.callbacks[UTP_GET_READ_BUFFER_SIZE].callback(args);
}
void utp_call_log(utp_context ctx, UTPSocketImpl socket, byte[] buf)
{
if (ctx.callbacks[UTP_LOG]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_LOG;
args.context = ctx;
args.socket = socket;
args.buf = buf;
ctx.callbacks[UTP_LOG].callback(args);
}
void utp_call_sendto(utp_context ctx, UTPSocketImpl socket, byte[] buf, int len, InetSocketAddress address, int flags)
{
if (ctx.callbacks[UTP_SENDTO]==null) return;
_utp_callback_arguments args = utp_callback_arguments;//new utp_callback_arguments();
args.callback_type = UTP_SENDTO;
args.context = ctx;
args.socket = socket;
args.buf = buf;
args.len = len;
args.address = address;
//args.address_len = address_len;
args.flags = flags;
ctx.callbacks[UTP_SENDTO].callback(args);
}
// utp_utils
public static final int ETHERNET_MTU = 1500;
public static final int IPV4_HEADER_SIZE = 20;
public static final int IPV6_HEADER_SIZE = 40;
public static final int UDP_HEADER_SIZE = 8;
public static final int GRE_HEADER_SIZE = 24;
public static final int PPPOE_HEADER_SIZE = 8;
public static final int MPPE_HEADER_SIZE = 2;
// packets have been observed in the wild that were fragmented
// with a payload of 1416 for the first fragment
// There are reports of routers that have MTU sizes as small as 1392
public static final int FUDGE_HEADER_SIZE = 36;
public static final int TEREDO_MTU = 1280;
public static final int UDP_IPV4_OVERHEAD = (IPV4_HEADER_SIZE + UDP_HEADER_SIZE);
public static final int UDP_IPV6_OVERHEAD = (IPV6_HEADER_SIZE + UDP_HEADER_SIZE);
public static final int UDP_TEREDO_OVERHEAD = (UDP_IPV4_OVERHEAD + UDP_IPV6_OVERHEAD);
public static final int UDP_IPV4_MTU = (ETHERNET_MTU - IPV4_HEADER_SIZE - UDP_HEADER_SIZE - GRE_HEADER_SIZE - PPPOE_HEADER_SIZE - MPPE_HEADER_SIZE - FUDGE_HEADER_SIZE);
public static final int UDP_IPV6_MTU = (ETHERNET_MTU - IPV6_HEADER_SIZE - UDP_HEADER_SIZE - GRE_HEADER_SIZE - PPPOE_HEADER_SIZE - MPPE_HEADER_SIZE - FUDGE_HEADER_SIZE);
public static final int UDP_TEREDO_MTU = (TEREDO_MTU - UDP_HEADER_SIZE);
private final boolean TEST_MODE;
private final UTPProviderCallback callback;
private final UTPTranslated.UTPFunctionTable fn_table;
private final UTPTranslated.SendToProc send_to_proc;
private final UTPTranslated.UTPGotIncomingConnection incoming_connection_proc;
private final utp_context global_ctx;
public
UTPTranslatedV2(
UTPProviderCallback _callback,
UTPTranslated.UTPFunctionTable _fn_table,
UTPTranslated.SendToProc _send_to_proc,
UTPTranslated.UTPGotIncomingConnection _icp,
boolean _test_mode )
{
callback = _callback;
fn_table = _fn_table;
send_to_proc = _send_to_proc;
incoming_connection_proc = _icp;
TEST_MODE = _test_mode;
global_ctx = new utp_context();
}
private final utp_callback_t utp_default_callbacks =
new utp_callback_t()
{
public long
callback(
_utp_callback_arguments args )
{
switch( args.callback_type ){
case UTP_GET_UDP_MTU:{
return(( args.address.getAddress() instanceof Inet6Address ) ? UDP_TEREDO_MTU : UDP_IPV4_MTU );
}
case UTP_GET_UDP_OVERHEAD:{
return((args.address.getAddress() instanceof Inet6Address ) ? UDP_TEREDO_OVERHEAD : UDP_IPV4_OVERHEAD );
}
case UTP_GET_MILLISECONDS:{
return( callback.getMilliseconds());
}
case UTP_GET_MICROSECONDS:{
return( callback.getMicroseconds());
}
case UTP_GET_RANDOM:{
return( callback.getRandom());
}
case UTP_ON_ACCEPT:{
incoming_connection_proc.got_incoming_connection( null, args.socket );
break;
}
case UTP_ON_ERROR:{
fn_table.on_error(args.socket.userdata, args.error_code);
break;
}
case UTP_ON_READ:{
fn_table.on_read(args.socket.userdata, args.bbuf, args.len );
break;
}
case UTP_ON_OVERHEAD_STATISTICS:{
fn_table.on_overhead( args.socket.userdata, args.send!=0, args.len, args.type );
break;
}
case UTP_ON_STATE_CHANGE:{
fn_table.on_state( args.socket.userdata, args.state );
break;
}
case UTP_GET_READ_BUFFER_SIZE:{
return( fn_table.get_rb_size( args.socket.userdata ));
}
case UTP_SENDTO:{
send_to_proc.send_to_proc( null, args.buf, args.address );
break;
}
default:{
Debug.out( "Default not supported!" );
}
}
return(-1);
}
};
// Returned by utp_get_context_stats()
/*
typedef struct {
uint32 _nraw_recv[5]; // total packets recieved less than 300/600/1200/MTU bytes fpr all connections (context-wide)
uint32 _nraw_send[5]; // total packets sent less than 300/600/1200/MTU bytes for all connections (context-wide)
} utp_context_stats;
*/
class utp_context_stats
{
int[] _nraw_recv = new int[5];
int[] _nraw_send = new int[5];
}
// Returned by utp_get_stats()
/*
typedef struct {
uint64 nbytes_recv; // total bytes received
uint64 nbytes_xmit; // total bytes transmitted
uint32 rexmit; // retransmit counter
uint32 fastrexmit; // fast retransmit counter
uint32 nxmit; // transmit counter
uint32 nrecv; // receive counter (total)
uint32 nduprecv; // duplicate receive counter
uint32 mtu_guess; // Best guess at MTU
} utp_socket_stats;
*/
class utp_socket_stats
{
long nbytes_recv; // total bytes received
long nbytes_xmit; // total bytes transmitted
int rexmit; // retransmit counter
int fastrexmit; // fast retransmit counter
int nxmit; // transmit counter
int nrecv; // receive counter (total)
int nduprecv; // duplicate receive counter
int mtu_guess; // Best guess at MTU
}
//#define UTP_IOV_MAX 1024
public static final int UTP_IOV_MAX = 1024;
// For utp_writev, to writes data from multiple buffers
/*
struct utp_iovec {
void *iov_base;
size_t iov_len;
};
*/
/*
private static class utp_iovec
{
byte[] iov_base;
int iov_len;
int iov_offset;
private
utp_iovec(
utp_iovec other )
{
iov_base = other.iov_base;
iov_len = other.iov_len;
iov_offset = other.iov_offset;
}
}
*/
// utp_internal.h
/* These originally lived in utp_config.h */
public static final int CCONTROL_TARGET = (100 * 1000); // us
/*
enum bandwidth_type_t {
payload_bandwidth, connect_overhead,
close_overhead, ack_overhead,
header_overhead, retransmit_overhead
};
*/
public static final int payload_bandwidth = 0;
public static final int connect_overhead = 1;
public static final int close_overhead = 2;
public static final int ack_overhead = 3;
public static final int header_overhead = 4;
public static final int retransmit_overhead = 5;
/*
#ifdef WIN32
#ifdef _MSC_VER
#include "win32_inet_ntop.h"
#endif
// newer versions of MSVC define these in errno.h
#ifndef ECONNRESET
#define ECONNRESET WSAECONNRESET
#define EMSGSIZE WSAEMSGSIZE
#define ECONNREFUSED WSAECONNREFUSED
#define ETIMEDOUT WSAETIMEDOUT
#endif
#endif
*/
/*
struct PACKED_ATTRIBUTE RST_Info {
PackedSockAddr addr;
uint32 connid;
uint16 ack_nr;
uint64 timestamp;
};
*/
class RST_Info {
InetSocketAddress addr;
int connid;
short ack_nr;
long timestamp;
};
// It's really important that we don't have duplicate keys in the hash table.
// If we do, we'll eventually crash. if we try to remove the second instance
// of the key, we'll accidentally remove the first instead. then later,
// checkTimeouts will try to access the second one's already freed memory.
/*
void UTP_FreeAll(struct UTPSocketHT *utp_sockets);
struct UTPSocketKey {
PackedSockAddr addr;
uint32 recv_id; // "conn_seed", "conn_id"
UTPSocketKey(const PackedSockAddr& _addr, uint32 _recv_id) {
memset(this, 0, sizeof(*this));
addr = _addr;
recv_id = _recv_id;
}
bool operator == (const UTPSocketKey &other) const {
return recv_id == other.recv_id && addr == other.addr;
}
uint32 compute_hash() const {
return recv_id ^ addr.compute_hash();
}
};
struct UTPSocketKeyData {
UTPSocketKey key;
UTPSocket *socket;
utp_link_t link;
};
*/
private static class
UTPSocketKey
{
private InetSocketAddress address;
private int recv_id;
private
UTPSocketKey(
InetSocketAddress _address,
int _recv_id )
{
address = _address;
recv_id = _recv_id;
}
public boolean
equals(
Object _other )
{
UTPSocketKey other = (UTPSocketKey)_other;
return( recv_id == other.recv_id && address.equals( other.address ));
}
public int
hashCode()
{
return( recv_id ^ address.hashCode());
}
}
private static class
UTPSocketKeyData
{
private
UTPSocketKeyData(
UTPSocketImpl _socket )
{
socket = _socket;
}
//private UTPSocketKey key;
private UTPSocketImpl socket;
//utp_link_t link;
}
/*
#define UTP_SOCKET_BUCKETS 79
#define UTP_SOCKET_INIT 15
struct UTPSocketHT : utpHashTable<UTPSocketKey, UTPSocketKeyData> {
UTPSocketHT() {
const int buckets = UTP_SOCKET_BUCKETS;
const int initial = UTP_SOCKET_INIT;
this->Create(buckets, initial);
}
~UTPSocketHT() {
UTP_FreeAll(this);
this->Free();
}
};
*/
/*
struct struct_utp_context {
void *userdata;
utp_callback_t* callbacks[UTP_ARRAY_SIZE];
uint64 current_ms;
utp_context_stats context_stats;
UTPSocket *last_utp_socket;
Array<UTPSocket*> ack_sockets;
Array<RST_Info> rst_info;
UTPSocketHT *utp_sockets;
size_t target_delay;
size_t opt_sndbuf;
size_t opt_rcvbuf;
uint64 last_check;
struct_utp_context();
~struct_utp_context();
void log(int level, utp_socket *socket, char const *fmt, ...);
bool log_normal:1; // log normal events?
bool log_mtu:1; // log MTU related events?
bool log_debug:1; // log debugging events? (Must also compile with UTP_DEBUG_LOGGING defined)
};
// from url_api.cpp
struct_utp_context::struct_utp_context()
: userdata(NULL)
, current_ms(0)
, last_utp_socket(NULL)
, log_normal(false)
, log_mtu(false)
, log_debug(false)
{
memset(&context_stats, 0, sizeof(context_stats));
memset(callbacks, 0, sizeof(callbacks));
target_delay = CCONTROL_TARGET;
utp_sockets = new UTPSocketHT;
callbacks[UTP_GET_UDP_MTU] = &utp_default_get_udp_mtu;
callbacks[UTP_GET_UDP_OVERHEAD] = &utp_default_get_udp_overhead;
callbacks[UTP_GET_MILLISECONDS] = &utp_default_get_milliseconds;
callbacks[UTP_GET_MICROSECONDS] = &utp_default_get_microseconds;
callbacks[UTP_GET_RANDOM] = &utp_default_get_random;
// 1 MB of receive buffer (i.e. max bandwidth delay product)
// means that from a peer with 200 ms RTT, we cannot receive
// faster than 5 MB/s
// from a peer with 10 ms RTT, we cannot receive faster than
// 100 MB/s. This is assumed to be good enough, since bandwidth
// often is proportional to RTT anyway
// when setting a download rate limit, all sockets should have
// their receive buffer set much lower, to say 60 kiB or so
opt_rcvbuf = opt_sndbuf = 1024 * 1024;
last_check = 0;
}
struct_utp_context::~struct_utp_context() {
delete this->utp_sockets;
}
*/
private class utp_context {
Object userdata;
utp_callback_t[] callbacks;
long current_ms;
utp_context_stats context_stats;
//UTPSocketImpl last_utp_socket;
LinkedHashSet<UTPSocketImpl> ack_sockets;
LinkedList<RST_Info> rst_info;
Map<UTPSocketKey,UTPSocketKeyData> utp_sockets;
int target_delay;
int opt_sndbuf;
int opt_rcvbuf;
long last_check;
//struct_utp_context();
//~struct_utp_context();
//void log(int level, utp_socket *socket, char const *fmt, ...);
boolean log_normal; // log normal events?
boolean log_mtu; // log MTU related events?
boolean log_debug; // log debugging events? (Must also compile with UTP_DEBUG_LOGGING defined)
private
utp_context()
{
userdata = null;
current_ms = 0;
//last_utp_socket = null;
log_normal = false;
log_mtu = false;
log_debug = false;
//memset(&context_stats, 0, sizeof(context_stats));
context_stats = new utp_context_stats();
//memset(callbacks, 0, sizeof(callbacks));
callbacks = new utp_callback_t[UTP_ARRAY_SIZE];
target_delay = CCONTROL_TARGET;
ack_sockets = new LinkedHashSet<UTPSocketImpl>();
rst_info = new LinkedList<RST_Info>();
utp_sockets = new HashMap<UTPSocketKey,UTPSocketKeyData>();
callbacks[UTP_ON_ACCEPT] = utp_default_callbacks;
//callbacks[UTP_ON_CONNECT] = utp_default_callbacks; - no, we need this via state change
callbacks[UTP_ON_ERROR] = utp_default_callbacks;
callbacks[UTP_ON_READ] = utp_default_callbacks;
callbacks[UTP_ON_OVERHEAD_STATISTICS] = utp_default_callbacks;
callbacks[UTP_ON_STATE_CHANGE] = utp_default_callbacks;
callbacks[UTP_GET_READ_BUFFER_SIZE] = utp_default_callbacks;
callbacks[UTP_SENDTO] = utp_default_callbacks;
callbacks[UTP_GET_UDP_MTU] = utp_default_callbacks;
callbacks[UTP_GET_UDP_OVERHEAD] = utp_default_callbacks;
callbacks[UTP_GET_MILLISECONDS] = utp_default_callbacks;
callbacks[UTP_GET_MICROSECONDS] = utp_default_callbacks;
callbacks[UTP_GET_RANDOM] = utp_default_callbacks;
// 1 MB of receive buffer (i.e. max bandwidth delay product)
// means that from a peer with 200 ms RTT, we cannot receive
// faster than 5 MB/s
// from a peer with 10 ms RTT, we cannot receive faster than
// 100 MB/s. This is assumed to be good enough, since bandwidth
// often is proportional to RTT anyway
// when setting a download rate limit, all sockets should have
// their receive buffer set much lower, to say 60 kiB or so
opt_rcvbuf = opt_sndbuf = 1024 * 1024;
last_check = 0;
}
};
/*
utp_internal.cpp
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h> // for UINT_MAX
#include "utp_types.h"
#include "utp_packedsockaddr.h"
#include "utp_internal.h"
#include "utp_hash.h"
*/
// #define TIMEOUT_CHECK_INTERVAL 500
public static final int TIMEOUT_CHECK_INTERVAL = 500;
// number of bytes to increase max window size by, per RTT. This is
// scaled down linearly proportional to off_target. i.e. if all packets
// in one window have 0 delay, window size will increase by this number.
// Typically it's less. TCP increases one MSS per RTT, which is 1500
// #define MAX_CWND_INCREASE_BYTES_PER_RTT 3000
public static final int MAX_CWND_INCREASE_BYTES_PER_RTT = 3000;
// #define CUR_DELAY_SIZE 3
public static final int CUR_DELAY_SIZE = 3;
// experiments suggest that a clock skew of 10 ms per 325 seconds
// is not impossible. Reset delay_base every 13 minutes. The clock
// skew is dealt with by observing the delay base in the other
// direction, and adjusting our own upwards if the opposite direction
// delay base keeps going down
// #define DELAY_BASE_HISTORY 13
public static final int DELAY_BASE_HISTORY = 13;
// #define MAX_WINDOW_DECAY 100 // ms
public static final int MAX_WINDOW_DECAY = 100;
// #define REORDER_BUFFER_SIZE 32
public static final int REORDER_BUFFER_SIZE = 32;
// #define REORDER_BUFFER_MAX_SIZE 1024
public static final int REORDER_BUFFER_MAX_SIZE = 1024;
// #define OUTGOING_BUFFER_MAX_SIZE 1024
public static final int OUTGOING_BUFFER_MAX_SIZE = 1024;
// #define PACKET_SIZE 1435
public static final int PACKET_SIZE = 1435;
// this is the minimum max_window value. It can never drop below this
// #define MIN_WINDOW_SIZE 10
public static final int MIN_WINDOW_SIZE = 10;
// if we receive 4 or more duplicate acks, we resend the packet
// that hasn't been acked yet
// #define DUPLICATE_ACKS_BEFORE_RESEND 3
public static final int DUPLICATE_ACKS_BEFORE_RESEND = 3;
// #define RST_INFO_TIMEOUT 10000
public static final int RST_INFO_TIMEOUT = 10000;
// #define RST_INFO_LIMIT 1000
public static final int RST_INFO_LIMIT = 1000;
// 29 seconds determined from measuring many home NAT devices
// #define KEEPALIVE_INTERVAL 29000
public static final int KEEPALIVE_INTERVAL = 29000;
// #define SEQ_NR_MASK 0xFFFF
// #define ACK_NR_MASK 0xFFFF
// #define TIMESTAMP_MASK 0xFFFFFFFF
public static final int SEQ_NR_MASK = 0xFFFF;
public static final int ACK_NR_MASK = 0xFFFF;
public static final long TIMESTAMP_MASK = 0x00000000FFFFFFFFL;
// #define DIV_ROUND_UP(num, denom) ((num + denom - 1) / denom)
public static final int DIV_ROUND_UP(int num, int denom){ return ((num + denom - 1) / denom);}
// The totals are derived from the following data:
// 45: IPv6 address including embedded IPv4 address
// 11: Scope Id
// 2: Brackets around IPv6 address when port is present
// 6: Port (including colon)
// 1: Terminating null byte
/*
char addrbuf[65];
#define addrfmt(x, s) x.fmt(s, sizeof(s))
#if (defined(__SVR4) && defined(__sun))
#pragma pack(1)
#else
#pragma pack(push,1)
#endif
*/
// these packet sizes are including the uTP header wich
// is either 20 or 23 bytes depending on version
// #define PACKET_SIZE_EMPTY_BUCKET 0
// #define PACKET_SIZE_EMPTY 23
// #define PACKET_SIZE_SMALL_BUCKET 1
// #define PACKET_SIZE_SMALL 373
// #define PACKET_SIZE_MID_BUCKET 2
// #define PACKET_SIZE_MID 723
// #define PACKET_SIZE_BIG_BUCKET 3
// #define PACKET_SIZE_BIG 1400
// #define PACKET_SIZE_HUGE_BUCKET 4
public static final int PACKET_SIZE_EMPTY_BUCKET = 0;
public static final int PACKET_SIZE_EMPTY = 23;
public static final int PACKET_SIZE_SMALL_BUCKET = 1;
public static final int PACKET_SIZE_SMALL = 373;
public static final int PACKET_SIZE_MID_BUCKET = 2;
public static final int PACKET_SIZE_MID = 723;
public static final int PACKET_SIZE_BIG_BUCKET = 3;
public static final int PACKET_SIZE_BIG = 1400;
public static final int PACKET_SIZE_HUGE_BUCKET = 4;
/*
struct PACKED_ATTRIBUTE PacketFormatV1 {
// packet_type (4 high bits)
// protocol version (4 low bits)
byte ver_type;
byte version() const { return ver_type & 0xf; }
byte type() const { return ver_type >> 4; }
void set_version(byte v) { ver_type = (ver_type & 0xf0) | (v & 0xf); }
void set_type(byte t) { ver_type = (ver_type & 0xf) | (t << 4); }
// Type of the first extension header
byte ext;
// connection ID
uint16_big connid;
uint32_big tv_usec;
uint32_big reply_micro;
// receive window size in bytes
uint32_big windowsize;
// Sequence number
uint16_big seq_nr;
// Acknowledgment number
uint16_big ack_nr;
};
struct PACKED_ATTRIBUTE PacketFormatAckV1 {
PacketFormatV1 pf;
byte ext_next;
byte ext_len;
byte acks[4];
};
#if (defined(__SVR4) && defined(__sun))
#pragma pack(0)
#else
#pragma pack(pop)
#endif
*/
static abstract class PacketFormatBase
{
public abstract byte[] serialise();
};
static abstract class PacketFormatBaseV1 extends PacketFormatBase{};
public static final int sizeof_PacketFormatV1 = 20;
static class PacketFormatV1 extends PacketFormatBaseV1{
// protocol version
byte version; // 4 bits
// type (formerly flags)
byte type; // 4 bits
// Type of the first extension header
byte ext;
// connection ID
short connid;
int tv_usec;
int reply_micro;
// receive window size in bytes
int windowsize;
// Sequence number
short seq_nr;
// Acknowledgment number
short ack_nr;
PacketFormatV1()
{
}
PacketFormatV1(
byte[] data )
{
type = (byte)(data[0]>>4);
version = (byte)(data[0]&0x0f);
ext = data[1];
int pos = 2;
connid = (short)( ((data[pos++]<<8)&0xff00) | (data[pos++]&0x00ff));
tv_usec = ((data[pos++]<<24)&0xff000000) | ((data[pos++]<<16)&0x00ff0000) | ((data[pos++]<<8)&0x0000ff00) | (data[pos++]&0x000000ff);
reply_micro = ((data[pos++]<<24)&0xff000000) | ((data[pos++]<<16)&0x00ff0000) | ((data[pos++]<<8)&0x0000ff00) | (data[pos++]&0x000000ff);
windowsize = ((data[pos++]<<24)&0xff000000) | ((data[pos++]<<16)&0x00ff0000) | ((data[pos++]<<8)&0x0000ff00) | (data[pos++]&0x000000ff);
// System.out.println( "ws in: " + windowsize );
seq_nr = (short)( ((data[pos++]<<8)&0xff00) | (data[pos++]&0x00ff));
ack_nr = (short)( ((data[pos++]<<8)&0xff00) | (data[pos++]&0x00ff));
}
protected void
set_version( int v )
{
version = (byte)v;
}
protected void
set_type( int t )
{
type = (byte)t;
}
protected byte
version()
{
return( version );
}
protected byte
type()
{
return( type );
}
public byte[]
serialise()
{
return( serialise( new byte[sizeof_PacketFormatV1] ));
}
public byte[]
serialise(
byte[] buffer )
{
int pos = 0;
buffer[pos++] = (byte)((type<<4) | version&0x0f );
buffer[pos++] = ext;
buffer[pos++] = (byte)(connid>>8);
buffer[pos++] = (byte)(connid);
buffer[pos++] = (byte)(tv_usec>>24);
buffer[pos++] = (byte)(tv_usec>>16);
buffer[pos++] = (byte)(tv_usec>>8);
buffer[pos++] = (byte)(tv_usec);
buffer[pos++] = (byte)(reply_micro>>24);
buffer[pos++] = (byte)(reply_micro>>16);
buffer[pos++] = (byte)(reply_micro>>8);
buffer[pos++] = (byte)(reply_micro);
buffer[pos++] = (byte)(windowsize>>24);
buffer[pos++] = (byte)(windowsize>>16);
buffer[pos++] = (byte)(windowsize>>8);
buffer[pos++] = (byte)(windowsize);
// System.out.println( "ws out: " + windowsize );
buffer[pos++] = (byte)(seq_nr>>8);
buffer[pos++] = (byte)(seq_nr);
buffer[pos++] = (byte)(ack_nr>>8);
buffer[pos++] = (byte)(ack_nr);
return( buffer );
}
};
public static final int sizeof_PacketFormatAckV1 = sizeof_PacketFormatV1 + 6;
public static final int sizeof_PacketFormatExtensionsV1 = sizeof_PacketFormatV1 + 10;
static class PacketFormatExtensionsV1 extends PacketFormatV1{
byte ext_next;
byte ext_len;
byte[] extensions = new byte[8];
PacketFormatExtensionsV1()
{
super();
}
PacketFormatExtensionsV1(
byte[] data )
{
super( data );
}
public byte[]
serialise()
{
if ( ext == 0 ){
return( super.serialise());
}else{
return( serialise( new byte[ext==1?sizeof_PacketFormatAckV1:sizeof_PacketFormatExtensionsV1] ));
}
}
public byte[]
serialise(
byte[] buffer )
{
super.serialise( buffer );
if ( ext != 0 ){
int pos = sizeof_PacketFormatV1;
buffer[pos++] = ext_next;
buffer[pos++] = ext_len;
System.arraycopy(extensions, 0, buffer, pos, ext == 1?4:8 );
}
return( buffer );
}
};
static class
PacketFormatExtensionDeserialised
{
byte ext;
byte[] ext_data;
PacketFormatExtensionDeserialised(
byte _ext,
byte[] _ext_data )
{
ext = _ext;
ext_data = _ext_data;
}
}
static class
PacketFormatDeserialised
{
PacketFormatV1 header;
List<PacketFormatExtensionDeserialised> exts;
ByteBuffer payload;
PacketFormatDeserialised(
byte[] data,
int len,
boolean test_only )
{
if ( len < sizeof_PacketFormatV1 ){
return;
}
byte type = (byte)(data[0]>>4);
byte version = (byte)(data[0]&0x0f);
byte ext = data[1];
boolean is_v1 = version == 1 && type < ST_NUM_STATES && ext < 3;
if ( !is_v1 ){
return;
}
header = new PacketFormatV1( data );
int pos = sizeof_PacketFormatV1;
exts = new ArrayList<PacketFormatExtensionDeserialised>();
while( ext != 0 ){
if ( len - pos < 2 ){
header = null;
return;
}
byte ext_next = data[pos++];
byte ext_len = data[pos++];
if ( len - pos < ext_len ){
header = null;
return;
}
if ( test_only ){
pos += ext_len;
ext = ext_next;
}else{
byte[] ext_data = new byte[ext_len];
System.arraycopy( data, pos, ext_data, 0, ext_len );
pos += ext_len;
PacketFormatExtensionDeserialised x = new PacketFormatExtensionDeserialised( ext, ext_data );
exts.add( x );
ext = ext_next;
}
}
if ( !test_only ){
// System.out.println( "uTP packet: " + (len-pos ) + ", buf=" + data.length);
if ( pos < len ){
payload = ByteBuffer.wrap( data, pos, len-pos );
//payload = new byte[ len-pos ];
//System.arraycopy( data, pos, payload, 0, payload.length );
}else{
payload = ByteBuffer.allocate(0);
//payload = new byte[0];
}
}
}
}
public static PacketFormatDeserialised
deserialise(
byte[] data,
int len,
boolean test_only )
{
PacketFormatDeserialised res = new PacketFormatDeserialised( data, len, test_only );
if ( res.header == null ){
return( null );
}
return( res );
}
/*
enum {
ST_DATA = 0, // Data packet.
ST_FIN = 1, // Finalize the connection. This is the last packet.
ST_STATE = 2, // State packet. Used to transmit an ACK with no data.
ST_RESET = 3, // Terminate connection forcefully.
ST_SYN = 4, // Connect SYN
ST_NUM_STATES, // used for bounds checking
};
static const cstr flagnames[] = {
"ST_DATA","ST_FIN","ST_STATE","ST_RESET","ST_SYN"
};
*/
public static final int ST_DATA = 0; // Data packet.
public static final int ST_FIN = 1; // Finalize the connection. This is the last packet.
public static final int ST_STATE = 2; // State packet. Used to transmit an ACK with no data.
public static final int ST_RESET = 3; // Terminate connection forcefully.
public static final int ST_SYN = 4; // Connect SYN
public static final int ST_NUM_STATES = 5; // used for bounds checking
static final String flagnames[] = {
"ST_DATA","ST_FIN","ST_STATE","ST_RESET","ST_SYN"
};
/*
enum CONN_STATE {
CS_UNINITIALIZED = 0,
CS_IDLE,
CS_SYN_SENT,
CS_CONNECTED,
CS_CONNECTED_FULL,
CS_GOT_FIN,
CS_DESTROY_DELAY,
CS_FIN_SENT,
CS_RESET,
CS_DESTROY
};
static const cstr statenames[] = {
"UNINITIALIZED", "IDLE","SYN_SENT","CONNECTED","CONNECTED_FULL","GOT_FIN","DESTROY_DELAY","FIN_SENT","RESET","DESTROY"
};
*/
public static final int CS_UNINITIALIZED = 0;
public static final int CS_IDLE = 1;
public static final int CS_SYN_SENT = 2;
public static final int CS_CONNECTED = 3;
public static final int CS_CONNECTED_FULL = 4;
public static final int CS_GOT_FIN = 5;
public static final int CS_DESTROY_DELAY = 6;
public static final int CS_FIN_SENT = 7;
public static final int CS_RESET = 8;
public static final int CS_DESTROY = 9;
static final String statenames[] = {
"UNINITIALIZED", "IDLE","SYN_SENT","CONNECTED","CONNECTED_FULL","GOT_FIN","DESTROY_DELAY","FIN_SENT","RESET","DESTROY"
};
/*
struct OutgoingPacket {
size_t length;
size_t payload;
uint64 time_sent; // microseconds
uint transmissions:31;
bool need_resend:1;
byte data[1];
};
*/
static class OutgoingPacket {
int length; // size_t ???
int payload;
long time_sent; // microseconds
int transmissions; // 31 BITS
boolean need_resend; // 1 BIT
PacketFormatBase packet_header;
byte[] packet_payload;
};
/*
struct SizableCircularBuffer {
// This is the mask. Since it's always a power of 2, adding 1 to this value will return the size.
size_t mask;
// This is the elements that the circular buffer points to
void **elements;
void *get(size_t i) { assert(elements); return elements ? elements[i & mask] : NULL; }
void put(size_t i, void *data) { assert(elements); elements[i&mask] = data; }
void grow(size_t item, size_t index);
void ensure_size(size_t item, size_t index) { if (index > mask) grow(item, index); }
size_t size() { return mask + 1; }
};
// Item contains the element we want to make space for
// index is the index in the list.
void SizableCircularBuffer::grow(size_t item, size_t index)
{
// Figure out the new size.
size_t size = mask + 1;
do size *= 2; while (index >= size);
// Allocate the new buffer
void **buf = (void**)calloc(size, sizeof(void*));
size--;
// Copy elements from the old buffer to the new buffer
for (size_t i = 0; i <= mask; i++) {
buf[(item - index + i) & size] = get(item - index + i);
}
// Swap to the newly allocated buffer
mask = size;
free(elements);
elements = buf;
}
*/
static class SizableCircularBuffer<T> {
// This is the mask. Since it's always a power of 2, adding 1 to this value will return the size.
int mask;
// This is the elements that the circular buffer points to
Object[] elements;
T get(int i) { if (ASSERTS)_assert(elements!=null); return elements!=null ? (T)elements[i & mask] : null; }
void put(int i, T data) { if (ASSERTS)_assert(elements!=null); elements[i&mask] = data; }
void ensure_size(int item, int index) { if (index > mask) grow(item, index); }
int size() { return mask + 1; }
void grow(int item, int index)
{
// Figure out the new size.
int size = mask + 1;
do size *= 2; while (index >= size);
// Allocate the new buffer
Object[] buf = new Object[size];
size--;
// Copy elements from the old buffer to the new buffer
for (int i = 0; i <= mask; i++) {
buf[(item - index + i) & size] = get(item - index + i);
}
// Swap to the newly allocated buffer
mask = size;
elements = buf;
}
};
// compare if lhs is less than rhs, taking wrapping
// into account. if lhs is close to UINT_MAX and rhs
// is close to 0, lhs is assumed to have wrapped and
// considered smaller
/*
bool wrapping_compare_less(uint32 lhs, uint32 rhs, uint32 mask)
{
// distance walking from lhs to rhs, downwards
const uint32 dist_down = (lhs - rhs) & mask;
// distance walking from lhs to rhs, upwards
const uint32 dist_up = (rhs - lhs) & mask;
// if the distance walking up is shorter, lhs
// is less than rhs. If the distance walking down
// is shorter, then rhs is less than lhs
return dist_up < dist_down;
}
*/
boolean wrapping_compare_less(int lhs, int rhs, long mask )
{
// distance walking from lhs to rhs, downwards
final long dist_down = (lhs - rhs) & mask; // derp
// distance walking from lhs to rhs, upwards
final long dist_up = (rhs - lhs) & mask; // derp
// if the distance walking up is shorter, lhs
// is less than rhs. If the distance walking down
// is shorter, then rhs is less than lhs
//System.out.println( "wcl: " + Integer.toString( lhs, 16 ) + "/" + Integer.toString( rhs, 16 ) + " -> " + dist_down + "/" + dist_up );
return dist_up < dist_down;
}
/*
struct DelayHist {
uint32 delay_base;
// this is the history of delay samples,
// normalized by using the delay_base. These
// values are always greater than 0 and measures
// the queuing delay in microseconds
uint32 cur_delay_hist[CUR_DELAY_SIZE];
size_t cur_delay_idx;
// this is the history of delay_base. It's
// a number that doesn't have an absolute meaning
// only relative. It doesn't make sense to initialize
// it to anything other than values relative to
// what's been seen in the real world.
uint32 delay_base_hist[DELAY_BASE_HISTORY];
size_t delay_base_idx;
// the time when we last stepped the delay_base_idx
uint64 delay_base_time;
bool delay_base_initialized;
void clear(uint64 current_ms)
{
delay_base_initialized = false;
delay_base = 0;
cur_delay_idx = 0;
delay_base_idx = 0;
delay_base_time = current_ms;
for (size_t i = 0; i < CUR_DELAY_SIZE; i++) {
cur_delay_hist[i] = 0;
}
for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) {
delay_base_hist[i] = 0;
}
}
void shift(const uint32 offset)
{
// the offset should never be "negative"
// assert(offset < 0x10000000);
// increase all of our base delays by this amount
// this is used to take clock skew into account
// by observing the other side's changes in its base_delay
for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) {
delay_base_hist[i] += offset;
}
delay_base += offset;
}
void add_sample(const uint32 sample, uint64 current_ms)
{
// The two clocks (in the two peers) are assumed not to
// progress at the exact same rate. They are assumed to be
// drifting, which causes the delay samples to contain
// a systematic error, either they are under-
// estimated or over-estimated. This is why we update the
// delay_base every two minutes, to adjust for this.
// This means the values will keep drifting and eventually wrap.
// We can cross the wrapping boundry in two directions, either
// going up, crossing the highest value, or going down, crossing 0.
// if the delay_base is close to the max value and sample actually
// wrapped on the other end we would see something like this:
// delay_base = 0xffffff00, sample = 0x00000400
// sample - delay_base = 0x500 which is the correct difference
// if the delay_base is instead close to 0, and we got an even lower
// sample (that will eventually update the delay_base), we may see
// something like this:
// delay_base = 0x00000400, sample = 0xffffff00
// sample - delay_base = 0xfffffb00
// this needs to be interpreted as a negative number and the actual
// recorded delay should be 0.
// It is important that all arithmetic that assume wrapping
// is done with unsigned intergers. Signed integers are not guaranteed
// to wrap the way unsigned integers do. At least GCC takes advantage
// of this relaxed rule and won't necessarily wrap signed ints.
// remove the clock offset and propagation delay.
// delay base is min of the sample and the current
// delay base. This min-operation is subject to wrapping
// and care needs to be taken to correctly choose the
// true minimum.
// specifically the problem case is when delay_base is very small
// and sample is very large (because it wrapped past zero), sample
// needs to be considered the smaller
if (!delay_base_initialized) {
// delay_base being 0 suggests that we haven't initialized
// it or its history with any real measurements yet. Initialize
// everything with this sample.
for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) {
// if we don't have a value, set it to the current sample
delay_base_hist[i] = sample;
continue;
}
delay_base = sample;
delay_base_initialized = true;
}
if (wrapping_compare_less(sample, delay_base_hist[delay_base_idx], TIMESTAMP_MASK)) {
// sample is smaller than the current delay_base_hist entry
// update it
delay_base_hist[delay_base_idx] = sample;
}
// is sample lower than delay_base? If so, update delay_base
if (wrapping_compare_less(sample, delay_base, TIMESTAMP_MASK)) {
// sample is smaller than the current delay_base
// update it
delay_base = sample;
}
// this operation may wrap, and is supposed to
const uint32 delay = sample - delay_base;
// sanity check. If this is triggered, something fishy is going on
// it means the measured sample was greater than 32 seconds!
//assert(delay < 0x2000000);
cur_delay_hist[cur_delay_idx] = delay;
cur_delay_idx = (cur_delay_idx + 1) % CUR_DELAY_SIZE;
// once every minute
if (current_ms - delay_base_time > 60 * 1000) {
delay_base_time = current_ms;
delay_base_idx = (delay_base_idx + 1) % DELAY_BASE_HISTORY;
// clear up the new delay base history spot by initializing
// it to the current sample, then update it
delay_base_hist[delay_base_idx] = sample;
delay_base = delay_base_hist[0];
// Assign the lowest delay in the last 2 minutes to delay_base
for (size_t i = 0; i < DELAY_BASE_HISTORY; i++) {
if (wrapping_compare_less(delay_base_hist[i], delay_base, TIMESTAMP_MASK))
delay_base = delay_base_hist[i];
}
}
}
uint32 get_value()
{
uint32 value = UINT_MAX;
for (size_t i = 0; i < CUR_DELAY_SIZE; i++) {
value = min<uint32>(cur_delay_hist[i], value);
}
// value could be UINT_MAX if we have no samples yet...
return value;
}
};
*/
class DelayHist {
int delay_base;
// this is the history of delay samples,
// normalized by using the delay_base. These
// values are always greater than 0 and measures
// the queuing delay in microseconds
int[] cur_delay_hist = new int[CUR_DELAY_SIZE];
int cur_delay_idx;
// this is the history of delay_base. It's
// a number that doesn't have an absolute meaning
// only relative. It doesn't make sense to initialize
// it to anything other than values relative to
// what's been seen in the real world.
int[] delay_base_hist = new int[DELAY_BASE_HISTORY];
int delay_base_idx;
// the time when we last stepped the delay_base_idx
long delay_base_time;
boolean delay_base_initialized;
void clear( long current_ms)
{
delay_base_initialized = false;
delay_base = 0;
cur_delay_idx = 0;
delay_base_idx = 0;
delay_base_time = current_ms;
for (int i = 0; i < CUR_DELAY_SIZE; i++) {
cur_delay_hist[i] = 0;
}
for (int i = 0; i < DELAY_BASE_HISTORY; i++) {
delay_base_hist[i] = 0;
}
}
void shift(final int offset)
{
// the offset should never be "negative"
if (ASSERTS)_assert(offset < 0x10000000);
// increase all of our base delays by this amount
// this is used to take clock skew into account
// by observing the other side's changes in its base_delay
for (int i = 0; i < DELAY_BASE_HISTORY; i++) {
delay_base_hist[i] += offset;
}
delay_base += offset;
}
void add_sample(final int sample, long current_ms)
{
// The two clocks (in the two peers) are assumed not to
// progress at the exact same rate. They are assumed to be
// drifting, which causes the delay samples to contain
// a systematic error, either they are under-
// estimated or over-estimated. This is why we update the
// delay_base every two minutes, to adjust for this.
// This means the values will keep drifting and eventually wrap.
// We can cross the wrapping boundry in two directions, either
// going up, crossing the highest value, or going down, crossing 0.
// if the delay_base is close to the max value and sample actually
// wrapped on the other end we would see something like this:
// delay_base = 0xffffff00, sample = 0x00000400
// sample - delay_base = 0x500 which is the correct difference
// if the delay_base is instead close to 0, and we got an even lower
// sample (that will eventually update the delay_base), we may see
// something like this:
// delay_base = 0x00000400, sample = 0xffffff00
// sample - delay_base = 0xfffffb00
// this needs to be interpreted as a negative number and the actual
// recorded delay should be 0.
// It is important that all arithmetic that assume wrapping
// is done with unsigned intergers. Signed integers are not guaranteed
// to wrap the way unsigned integers do. At least GCC takes advantage
// of this relaxed rule and won't necessarily wrap signed ints.
// remove the clock offset and propagation delay.
// delay base is min of the sample and the current
// delay base. This min-operation is subject to wrapping
// and care needs to be taken to correctly choose the
// true minimum.
// specifically the problem case is when delay_base is very small
// and sample is very large (because it wrapped past zero), sample
// needs to be considered the smaller
if (!delay_base_initialized) {
// delay_base being 0 suggests that we haven't initialized
// it or its history with any real measurements yet. Initialize
// everything with this sample.
for (int i = 0; i < DELAY_BASE_HISTORY; i++) {
// if we don't have a value, set it to the current sample
delay_base_hist[i] = sample;
continue;
}
delay_base = sample;
delay_base_initialized = true;
}
if (wrapping_compare_less(sample, delay_base_hist[delay_base_idx], TIMESTAMP_MASK )) {
// sample is smaller than the current delay_base_hist entry
// update it
delay_base_hist[delay_base_idx] = sample;
}
// is sample lower than delay_base? If so, update delay_base
if (wrapping_compare_less(sample, delay_base, TIMESTAMP_MASK)) {
// sample is smaller than the current delay_base
// update it
delay_base = sample;
}
// this operation may wrap, and is supposed to
final int delay = (int)( ((long)sample)&TIMESTAMP_MASK - ((long)delay_base)&TIMESTAMP_MASK); // derp
// sanity check. If this is triggered, something fishy is going on
// it means the measured sample was greater than 32 seconds!
// if (ASSERTS)_assert(delay < 0x2000000);
cur_delay_hist[cur_delay_idx] = delay;
cur_delay_idx = (cur_delay_idx + 1) % CUR_DELAY_SIZE;
// once every minute
if (current_ms - delay_base_time > 60 * 1000) {
delay_base_time = current_ms;
delay_base_idx = (delay_base_idx + 1) % DELAY_BASE_HISTORY;
// clear up the new delay base history spot by initializing
// it to the current sample, then update it
delay_base_hist[delay_base_idx] = sample;
delay_base = delay_base_hist[0];
// Assign the lowest delay in the last 2 minutes to delay_base
for (int i = 0; i < DELAY_BASE_HISTORY; i++) {
if (wrapping_compare_less(delay_base_hist[i], delay_base, TIMESTAMP_MASK))
delay_base = delay_base_hist[i];
}
}
//System.out.println( "addsample:" + Integer.toString(sample,16) + ", delay=" + delay + " - base=" + Integer.toString(delay_base,16));
}
long get_value()
{
long value = 0xffffffffL; // derp
for (int i = 0; i < CUR_DELAY_SIZE; i++) {
value = Math.min(((long)cur_delay_hist[i])&TIMESTAMP_MASK,value);
}
// value could be UINT_MAX if we have no samples yet...
//System.out.println( "delhist=" + value );
return(uint32( value ));
}
};
//struct UTPSocket {
// ~UTPSocket();
class UTPSocketImpl implements UTPSocket {
//PackedSockAddr addr;
InetSocketAddress addr;
utp_context ctx;
// int ida; //for ack socket list
short retransmit_count;
final UnsignedShort reorder_count = new UnsignedShort();
byte duplicate_ack;
// the number of packets in the send queue. Packets that haven't
// yet been sent count as well as packets marked as needing resend
// the oldest un-acked packet in the send queue is seq_nr - cur_window_packets
final UnsignedShort cur_window_packets = new UnsignedShort();
// how much of the window is used, number of bytes in-flight
// packets that have not yet been sent do not count, packets
// that are marked as needing to be re-sent (due to a timeout)
// don't count either
int cur_window;
// maximum window size, in bytes
int max_window;
// UTP_SNDBUF setting, in bytes
int opt_sndbuf;
// UTP_RCVBUF setting, in bytes
int opt_rcvbuf;
// this is the target delay, in microseconds
// for this socket. defaults to 100000.
int target_delay;
// Is a FIN packet in the reassembly buffer?
boolean got_fin;
// Timeout procedure
boolean fast_timeout;
// max receive window for other end, in bytes
int max_window_user;
int state;
// TickCount when we last decayed window (wraps)
final UnsignedInteger last_rwin_decay = new UnsignedInteger();
// the sequence number of the FIN packet. This field is only set
// when we have received a FIN, and the flag field has the FIN flag set.
// it is used to know when it is safe to destroy the socket, we must have
// received all packets up to this sequence number first.
final UnsignedShort eof_pkt = new UnsignedShort();
// All sequence numbers up to including this have been properly received
// by us
final UnsignedShort ack_nr = new UnsignedShort();
// This is the sequence number for the next packet to be sent.
final UnsignedShort seq_nr = new UnsignedShort();
final UnsignedShort timeout_seq_nr = new UnsignedShort();
// This is the sequence number of the next packet we're allowed to
// do a fast resend with. This makes sure we only do a fast-resend
// once per packet. We can resend the packet with this sequence number
// or any later packet (with a higher sequence number).
final UnsignedShort fast_resend_seq_nr = new UnsignedShort();
int reply_micro;
long last_got_packet;
long last_sent_packet;
long last_measured_delay;
// timestamp of the last time the cwnd was full
// this is used to prevent the congestion window
// from growing when we're not sending at capacity
long last_maxed_out_window;
Object userdata;
// Round trip time
int rtt;
// Round trip time variance
int rtt_var;
// Round trip timeout
int rto;
DelayHist rtt_hist = new DelayHist();
int retransmit_timeout;
// The RTO timer will timeout here.
long rto_timeout;
// When the window size is set to zero, start this timer. It will send a new packet every 30secs.
long zerowindow_time;
int conn_seed;
// Connection ID for packets I receive
int conn_id_recv;
// Connection ID for packets I send
int conn_id_send;
// Last rcv window we advertised, in bytes
int last_rcv_win;
DelayHist our_hist = new DelayHist();
DelayHist their_hist = new DelayHist();
// extension bytes from SYN packet
byte[] extensions = new byte[8];
// MTU Discovery
// time when we should restart the MTU discovery
long mtu_discover_time;
// ceiling and floor of binary search. last is the mtu size
// we're currently using
int mtu_ceiling, mtu_floor, mtu_last;
// we only ever have a single probe in flight at any given time.
// this is the sequence number of that probe, and the size of
// that packet
int mtu_probe_seq, mtu_probe_size;
// this is the average delay samples, as compared to the initial
// sample. It's averaged over 5 seconds
int average_delay;
// this is the sum of all the delay samples
// we've made recently. The important distinction
// of these samples is that they are all made compared
// to the initial sample, this is to deal with
// wrapping in a simple way.
long current_delay_sum;
// number of sample ins current_delay_sum
int current_delay_samples;
// initialized to 0, set to the first raw delay sample
// each sample that's added to current_delay_sum
// is subtracted from the value first, to make it
// a delay relative to this sample
int average_delay_base;
// the next time we should add an average delay
// sample into average_delay_hist
long average_sample_time;
// the estimated clock drift between our computer
// and the endpoint computer. The unit is microseconds
// per 5 seconds
int clock_drift;
// just used for logging
int clock_drift_raw;
SizableCircularBuffer<ByteBuffer> inbuf = new SizableCircularBuffer<ByteBuffer>();
SizableCircularBuffer<OutgoingPacket> outbuf = new SizableCircularBuffer<OutgoingPacket>();
/*
#ifdef _DEBUG
// Public per-socket statistics, returned by utp_get_stats()
utp_socket_stats _stats;
#endif
*/
// true if we're in slow-start (exponential growth) phase
boolean slow_start;
// the slow-start threshold, in bytes
int ssthresh;
/*
void log(int level, char const *fmt, ...)
{
va_list va;
char buf[4096], buf2[4096];
va_start(va, fmt);
vsnprintf(buf, 4096, fmt, va);
va_end(va);
buf[4095] = '\0';
snprintf(buf2, 4096, "%p %s %06d %s", this, addrfmt(addr, addrbuf), conn_id_recv, buf);
buf2[4095] = '\0';
ctx->log(level, this, buf2);
}
*/
//void schedule_ack();
// called every time mtu_floor or mtu_ceiling are adjusted
//void mtu_search_update();
//void mtu_reset();
// Calculates the current receive window
int get_rcv_window()
{
// Trim window down according to what's already in buffer.
int numbuf = utp_call_get_read_buffer_size(this.ctx, this);
if(ASSERTS)_assert((int)numbuf >= 0);
return opt_rcvbuf > numbuf ? opt_rcvbuf - numbuf : 0;
}
// Test if we're ready to decay max_window
// XXX this breaks when spaced by > INT_MAX/2, which is 49
// days; the failure mode in that case is we do an extra decay
// or fail to do one when we really shouldn't.
boolean can_decay_win(UnsignedInteger msec) //const
{
return msec.minus( last_rwin_decay ) >= MAX_WINDOW_DECAY;
}
// If we can, decay max window, returns true if we actually did so
void maybe_decay_win(long current_ms)
{
if (can_decay_win(new UnsignedInteger( current_ms))) {
// TCP uses 0.5
max_window = (int)(max_window * .5);
last_rwin_decay.set( current_ms );
if (max_window < MIN_WINDOW_SIZE)
max_window = MIN_WINDOW_SIZE;
slow_start = false;
ssthresh = max_window;
}
}
int get_header_size() //const
{
return sizeof_PacketFormatV1;
}
int get_udp_mtu()
{
//socklen_t len;
//SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len);
//return utp_call_get_udp_mtu(this->ctx, this, (const struct sockaddr *)&sa, len);
return( utp_call_get_udp_mtu( this.ctx, this, addr ));
}
int get_udp_overhead()
{
//socklen_t len;
//SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&len);
//return utp_call_get_udp_overhead(this->ctx, this, (const struct sockaddr *)&sa, len);
return( utp_call_get_udp_overhead( this.ctx, this, addr ));
}
int get_overhead()
{
return get_udp_overhead() + get_header_size();
}
//void send_data(byte* b, size_t length, bandwidth_type_t type, uint32 flags = 0);
//void send_ack(bool synack = false);
//void send_keep_alive();
/*
static void send_rst(utp_context *ctx,
const PackedSockAddr &addr, uint32 conn_id_send,
uint16 ack_nr, uint16 seq_nr);
*/
//void send_packet(OutgoingPacket *pkt);
//bool is_full(int bytes = -1);
//bool flush_packets();
//void write_outgoing_packet(size_t payload, uint flags, struct utp_iovec *iovec, size_t num_iovecs);
/*
#ifdef _DEBUG
void check_invariant();
#endif
*/
//void check_timeouts();
//int ack_packet(uint16 seq);
//size_t selective_ack_bytes(uint base, const byte* mask, byte len, int64& min_rtt);
//void selective_ack(uint base, const byte *mask, byte len);
//void apply_ccontrol(size_t bytes_acked, uint32 actual_delay, int64 min_rtt);
//size_t get_packet_size() const;
//};
void removeSocketFromAckList(UTPSocketImpl conn)
{
conn.ctx.ack_sockets.remove( conn );
/*
if (conn->ida >= 0)
{
UTPSocket *last = conn->ctx->ack_sockets[conn->ctx->ack_sockets.GetCount() - 1];
assert(last->ida < conn->ctx->ack_sockets.GetCount());
assert(conn->ctx->ack_sockets[last->ida] == last);
last->ida = conn->ida;
conn->ctx->ack_sockets[conn->ida] = last;
conn->ida = -1;
// Decrease the count
conn->ctx->ack_sockets.SetCount(conn->ctx->ack_sockets.GetCount() - 1);
}
*/
}
void schedule_ack()
{
if (!ctx.ack_sockets.contains( this )){
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "schedule_ack");
//#endif
//ida = ctx.ack_sockets.add(this);
ctx.ack_sockets.add(this);
} else {
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "schedule_ack: already in list");
//#endif
}
}
void send_data(PacketFormatBase packet_header, byte[] packet_payload, int type)
{
send_data( packet_header, packet_payload, type, 0 );
}
void send_data(PacketFormatBase packet_header, byte[] packet_payload, int type, int flags)
{
// time stamp this packet with local time, the stamp goes into
// the header of every packet at the 8th byte for 8 bytes :
// two integers, check packet.h for more
long time = utp_call_get_microseconds(ctx, this);
PacketFormatV1 b1 = (PacketFormatV1)packet_header;
b1.tv_usec = (int)time;
b1.reply_micro = reply_micro;
last_sent_packet = ctx.current_ms;
byte[] serialised_data;
byte[] header_data = packet_header.serialise();
if ( packet_payload == null ){
serialised_data = header_data;
}else{
byte[] temp = new byte[header_data.length + packet_payload.length];
System.arraycopy( header_data, 0, temp, 0, header_data.length );
System.arraycopy( packet_payload, 0, temp, header_data.length, packet_payload.length );
serialised_data = temp;
}
int length = serialised_data.length;
/*
#ifdef _DEBUG
_stats.nbytes_xmit += length;
++_stats.nxmit;
#endif
*/
if (ctx.callbacks[UTP_ON_OVERHEAD_STATISTICS] != null ) {
int n;
if (type == payload_bandwidth) {
// if this packet carries payload, just
// count the header as overhead
type = header_overhead;
n = get_overhead();
} else {
n = length + get_udp_overhead();
}
utp_call_on_overhead_statistics(ctx, this, 1, n, type);
}
/*
#if UTP_DEBUG_LOGGING
int flags2 = b1->type();
uint16 seq_nr = b1->seq_nr;
uint16 ack_nr = b1->ack_nr;
log(UTP_LOG_DEBUG, "send %s len:%u id:%u timestamp:"I64u" reply_micro:%u flags:%s seq_nr:%u ack_nr:%u",
addrfmt(addr, addrbuf), (uint)length, conn_id_send, time, reply_micro, flagnames[flags2],
seq_nr, ack_nr);
#endif
*/
send_to_addr(ctx, serialised_data, addr, flags);
removeSocketFromAckList(this);
}
void send_ack()
{
send_ack( false );
}
void send_ack(boolean synack)
{
PacketFormatBase base;
PacketFormatExtensionsV1 pfa1 = new PacketFormatExtensionsV1();
base = pfa1;
last_rcv_win = get_rcv_window();
pfa1.set_version(1);
pfa1.set_type(ST_STATE);
pfa1.ext = 0;
pfa1.connid = (short)conn_id_send;
pfa1.ack_nr = (short)ack_nr.i;
pfa1.seq_nr = (short)seq_nr.i;
pfa1.windowsize = (int)last_rcv_win;
//len = sizeof(PacketFormatV1);
// we never need to send EACK for connections
// that are shutting down
if (reorder_count.i != 0 && state < CS_GOT_FIN) {
// if reorder count > 0, send an EACK.
// reorder count should always be 0
// for synacks, so this should not be
// as synack
if (ASSERTS)_assert(!synack);
pfa1.ext = 1;
pfa1.ext_next = 0;
pfa1.ext_len = 4;
int m = 0;
// reorder count should only be non-zero
// if the packet ack_nr + 1 has not yet
// been received
if (ASSERTS)_assert(inbuf.get(ack_nr.i + 1) == null);
int window = Math.min(14+16, inbuf.size());
// Generate bit mask of segments received.
for (int i = 0; i < window; i++) {
if (inbuf.get(ack_nr.i + i + 2) != null) {
m |= 1 << i;
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "EACK packet [%u]", ack_nr + i + 2);
//#endif
}
}
pfa1.extensions[0] = (byte)m;
pfa1.extensions[1] = (byte)(m >> 8);
pfa1.extensions[2] = (byte)(m >> 16);
pfa1.extensions[3] = (byte)(m >> 24);
//len += 4 + 2;
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "Sending EACK %u [%u] bits:[%032b]", ack_nr, conn_id_send, m);
//#endif
} else {
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "Sending ACK %u [%u]", ack_nr, conn_id_send);
//#endif
}
send_data(base, null, ack_overhead);
removeSocketFromAckList(this);
}
void send_keep_alive()
{
ack_nr.dec();
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "Sending KeepAlive ACK %u [%u]", ack_nr, conn_id_send);
//#endif
send_ack();
ack_nr.inc();
}
void send_packet(OutgoingPacket pkt)
{
// only count against the quota the first time we
// send the packet. Don't enforce quota when closing
// a socket. Only enforce the quota when we're sending
// at slow rates (max window < packet size)
//size_t max_send = min(max_window, opt_sndbuf, max_window_user);
long cur_time = utp_call_get_milliseconds(this.ctx, this);
if (pkt.transmissions == 0 || pkt.need_resend) {
cur_window += pkt.payload;
}
pkt.need_resend = false;
PacketFormatV1 p1 = (PacketFormatV1)pkt.packet_header;
p1.ack_nr = (short)ack_nr.i;
pkt.time_sent = utp_call_get_microseconds(this.ctx, this);
//socklen_t salen;
//SOCKADDR_STORAGE sa = addr.get_sockaddr_storage(&salen);
boolean use_as_mtu_probe = false;
// TODO: this is subject to nasty wrapping issues! Below as well
if (mtu_discover_time < cur_time) {
// it's time to reset our MTU assupmtions
// and trigger a new search
mtu_reset();
}
// don't use packets that are larger then mtu_ceiling
// as probes, since they were probably used as probes
// already and failed, now we need it to fragment
// just to get it through
// if seq_nr == 1, the probe would end up being 0
// which is a magic number representing no-probe
// that why we don't send a probe for a packet with
// sequence number 0
if (mtu_floor < mtu_ceiling
&& pkt.length > mtu_floor
&& pkt.length <= mtu_ceiling
&& mtu_probe_seq == 0
&& seq_nr.i != 1
&& pkt.transmissions == 0) {
// we've already incremented seq_nr
// for this packet
mtu_probe_seq = (seq_nr.i - 1) & ACK_NR_MASK;
mtu_probe_size = pkt.length;
if(ASSERTS)_assert(pkt.length >= mtu_floor);
if(ASSERTS)_assert(pkt.length <= mtu_ceiling);
use_as_mtu_probe = true;
//log(UTP_LOG_MTU, "MTU [PROBE] floor:%d ceiling:%d current:%d"
// , mtu_floor, mtu_ceiling, mtu_probe_size);
}
pkt.transmissions++;
send_data(pkt.packet_header, pkt.packet_payload,
(state == CS_SYN_SENT) ? connect_overhead
: (pkt.transmissions == 1) ? payload_bandwidth
: retransmit_overhead, use_as_mtu_probe ? UTP_UDP_DONTFRAG : 0);
}
boolean is_full()
{
return( is_full( -1 ));
}
boolean is_full(int bytes)
{
int packet_size = get_packet_size();
if (bytes < 0) bytes = packet_size;
else if (bytes > packet_size) bytes = packet_size;
int max_send = Math.min(max_window, Math.min( opt_sndbuf, max_window_user));
// subtract one to save space for the FIN packet
if (cur_window_packets.i >= OUTGOING_BUFFER_MAX_SIZE - 1) {
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "is_full:false cur_window_packets:%d MAX:%d", cur_window_packets, OUTGOING_BUFFER_MAX_SIZE - 1);
//#endif
last_maxed_out_window = ctx.current_ms;
return true;
}
/*
#if UTP_DEBUG_LOGGING
log(UTP_LOG_DEBUG, "is_full:%s. cur_window:%u pkt:%u max:%u cur_window_packets:%u max_window:%u"
, (cur_window + bytes > max_send) ? "true" : "false"
, cur_window, bytes, max_send, cur_window_packets
, max_window);
#endif
*/
if (cur_window + bytes > max_send) {
last_maxed_out_window = ctx.current_ms;
return true;
}
return false;
}
boolean flush_packets()
{
int packet_size = get_packet_size();
// send packets that are waiting on the pacer to be sent
// i has to be an unsigned 16 bit counter to wrap correctly
// signed types are not guaranteed to wrap the way you expect
for (final UnsignedShort i = new UnsignedShort(seq_nr.i - cur_window_packets.i); i.i != seq_nr.i; i.inc()) {
OutgoingPacket pkt = outbuf.get(i.i);
if (pkt == null || (pkt.transmissions > 0 && pkt.need_resend == false)) continue;
// have we run out of quota?
if (is_full()) return true;
// Nagle check
// don't send the last packet if we have one packet in-flight
// and the current packet is still smaller than packet_size.
if (i.i != ((seq_nr.i - 1) & ACK_NR_MASK) ||
cur_window_packets.i == 1 ||
pkt.payload >= packet_size) {
send_packet(pkt);
}
}
return false;
}
// @payload: number of bytes to send
// @flags: either ST_DATA, or ST_FIN
// @iovec: base address of iovec array
// @num_iovecs: number of iovecs in array
void write_outgoing_packet(int payload, int flags, ByteBuffer[] iovec, int num_iovecs)
{
// Setup initial timeout timer
if (cur_window_packets.i == 0) {
retransmit_timeout = rto;
rto_timeout = ctx.current_ms + retransmit_timeout;
if (ASSERTS)_assert(cur_window == 0);
}
int packet_size = get_packet_size();
do {
if (ASSERTS)_assert(cur_window_packets.i < OUTGOING_BUFFER_MAX_SIZE);
if (ASSERTS)_assert(flags == ST_DATA || flags == ST_FIN);
int added = 0;
OutgoingPacket pkt = null;
if (cur_window_packets.i > 0) {
pkt = outbuf.get(seq_nr.i - 1);
}
int header_size = get_header_size();
boolean append = true;
// if there's any room left in the last packet in the window
// and it hasn't been sent yet, fill that frame first
if (payload != 0 && pkt != null && (pkt.transmissions==0) && pkt.payload < packet_size) {
// Use the previous unsent packet
added = Math.min(payload + pkt.payload, Math.max(packet_size, pkt.payload)) - pkt.payload;
//pkt = (OutgoingPacket*)realloc(pkt,
// (sizeof(OutgoingPacket) - 1) +
// header_size +
// pkt->payload + added);
byte[] old_payload = pkt.packet_payload;
byte[] new_payload = new byte[old_payload.length + added];
System.arraycopy( old_payload, 0, new_payload, 0, old_payload.length );
pkt.packet_payload = new_payload;
outbuf.put(seq_nr.i - 1, pkt);
append = false;
if (ASSERTS)_assert(!pkt.need_resend);
} else {
// Create the packet to send.
added = payload;
//pkt = (OutgoingPacket*)malloc((sizeof(OutgoingPacket) - 1) +
// header_size +
// added);
pkt = new OutgoingPacket();
pkt.packet_header = new PacketFormatV1();
pkt.packet_payload = new byte[added];
pkt.payload = 0;
pkt.transmissions = 0;
pkt.need_resend = false;
}
if (added > 0) {
if (ASSERTS)_assert(flags == ST_DATA);
byte[] packet_payload = pkt.packet_payload;
// Fill it with data from the upper layer.
//unsigned char *p = pkt->data + header_size + pkt->payload;
int p = 0;
int needed = added;
/*
while (needed) {
*p = *(char*)iovec[0].iov_base;
p++;
iovec[0].iov_base = (char *)iovec[0].iov_base + 1;
needed--;
}
*/
for (int i = 0; i < num_iovecs && needed > 0; i++) {
if (iovec[i].remaining() == 0)
continue;
int num = Math.min(needed, iovec[i].remaining());
//memcpy(p, iovec[i].iov_base, num);
//System.arraycopy( iovec[i].iov_base, iovec[i].iov_offset, packet_payload, p, num );
iovec[i].get( packet_payload, p, num );
p += num;
//iovec[i].iov_len -= num;
//iovec[i].iov_offset += num; // iovec[i].iov_base += num, but without void* pointers
needed -= num;
}
if (ASSERTS)_assert(needed == 0);
}
pkt.payload += added;
pkt.length = header_size + pkt.payload;
last_rcv_win = get_rcv_window();
PacketFormatV1 p1 = (PacketFormatV1)pkt.packet_header;
p1.set_version(1);
p1.set_type(flags);
p1.ext = 0;
p1.connid = (short)conn_id_send;
p1.windowsize = (int)last_rcv_win;
p1.ack_nr = (short)ack_nr.i;
if (append) {
// Remember the message in the outgoing queue.
outbuf.ensure_size(seq_nr.i, cur_window_packets.i);
outbuf.put(seq_nr.i, pkt);
p1.seq_nr = (short)seq_nr.i;
seq_nr.inc();
cur_window_packets.inc();
}
payload -= added;
} while (payload > 0 );
flush_packets();
}
/*
#ifdef _DEBUG
void UTPSocket::check_invariant()
{
if (reorder_count > 0) {
assert(inbuf.get(ack_nr + 1) == NULL);
}
size_t outstanding_bytes = 0;
for (int i = 0; i < cur_window_packets; ++i) {
OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - i - 1);
if (pkt == 0 || pkt->transmissions == 0 || pkt->need_resend) continue;
outstanding_bytes += pkt->payload;
}
assert(outstanding_bytes == cur_window);
}
#endif
*/
void check_timeouts()
{
//#ifdef _DEBUG
//check_invariant();
//#endif
// this invariant should always be true
if(ASSERTS)_assert(cur_window_packets.i == 0 || outbuf.get(seq_nr.i - cur_window_packets.i) != null);
/*
#if UTP_DEBUG_LOGGING
log(UTP_LOG_DEBUG, "CheckTimeouts timeout:%d max_window:%u cur_window:%u "
"state:%s cur_window_packets:%u",
(int)(rto_timeout - ctx->current_ms), (uint)max_window, (uint)cur_window,
statenames[state], cur_window_packets);
#endif
*/
if (state != CS_DESTROY) flush_packets();
switch (state) {
case CS_SYN_SENT:
case CS_CONNECTED_FULL:
case CS_CONNECTED:
case CS_FIN_SENT: {
// Reset max window...
if ((int)(ctx.current_ms - zerowindow_time) >= 0 && max_window_user == 0) {
max_window_user = PACKET_SIZE;
}
if ((int)(ctx.current_ms - rto_timeout) >= 0
&& rto_timeout > 0) {
boolean ignore_loss = false;
if (cur_window_packets.i == 1
&& ((seq_nr.i - 1) & ACK_NR_MASK) == mtu_probe_seq
&& mtu_probe_seq != 0) {
// we only had a single outstanding packet that timed out, and it was the probe
mtu_ceiling = mtu_probe_size - 1;
mtu_search_update();
// this packet was most likely dropped because the packet size being
// too big and not because congestion. To accelerate the binary search for
// the MTU, resend immediately and don't reset the window size
ignore_loss = true;
//log(UTP_LOG_MTU, "MTU [PROBE-TIMEOUT] floor:%d ceiling:%d current:%d"
// , mtu_floor, mtu_ceiling, mtu_last);
}
// we dropepd the probe, clear these fields to
// allow us to send a new one
mtu_probe_seq = mtu_probe_size = 0;
//log(UTP_LOG_MTU, "MTU [TIMEOUT]");
/*
OutgoingPacket *pkt = (OutgoingPacket*)outbuf.get(seq_nr - cur_window_packets);
// If there were a lot of retransmissions, force recomputation of round trip time
if (pkt->transmissions >= 4)
rtt = 0;
*/
// Increase RTO
int new_timeout = ignore_loss ? retransmit_timeout : retransmit_timeout * 2;
if (retransmit_count >= 4 || (state == CS_SYN_SENT && retransmit_count >= 2)) {
// 4 consecutive transmissions have timed out. Kill it. If we
// haven't even connected yet, give up after only 2 consecutive
// failed transmissions.
if (state == CS_FIN_SENT)
state = CS_DESTROY;
else
state = CS_RESET;
utp_call_on_error(ctx, this, UTP_ETIMEDOUT);
return;
}
retransmit_timeout = new_timeout;
rto_timeout = ctx.current_ms + new_timeout;
if (!ignore_loss) {
// On Timeout
duplicate_ack = 0;
int packet_size = get_packet_size();
if (cur_window_packets.i == 0 && max_window > packet_size) {
// we don't have any packets in-flight, even though
// we could. This implies that the connection is just
// idling. No need to be aggressive about resetting the
// congestion window. Just let it decay by a 3:rd.
// don't set it any lower than the packet size though
max_window = Math.max(max_window * 2 / 3, (int)(packet_size));
} else {
// our delay was so high that our congestion window
// was shrunk below one packet, preventing us from
// sending anything for one time-out period. Now, reset
// the congestion window to fit one packet, to start over
// again
max_window = packet_size;
slow_start = true;
}
}
// every packet should be considered lost
for (int i = 0; i < cur_window_packets.i; ++i) {
OutgoingPacket pkt = outbuf.get(seq_nr.i - i - 1);
if (pkt == null || pkt.transmissions == 0 || pkt.need_resend) continue;
pkt.need_resend = true;
if (ASSERTS)_assert(cur_window >= pkt.payload);
cur_window -= pkt.payload;
}
if (cur_window_packets.i > 0) {
retransmit_count++;
// used in parse_log.py
//log(UTP_LOG_NORMAL, "Packet timeout. Resend. seq_nr:%u. timeout:%u "
// "max_window:%u cur_window_packets:%d"
// , seq_nr - cur_window_packets, retransmit_timeout
// , (uint)max_window, int(cur_window_packets));
fast_timeout = true;
timeout_seq_nr.set( seq_nr.i );
OutgoingPacket pkt = outbuf.get(seq_nr.i - cur_window_packets.i);
if (ASSERTS)_assert(pkt!=null);
// Re-send the packet.
send_packet(pkt);
}
}
// Mark the socket as writable. If the cwnd has grown, or if the number of
// bytes in-flight is lower than cwnd, we need to make the socket writable again
// in case it isn't
if (state == CS_CONNECTED_FULL && !is_full()) {
state = CS_CONNECTED;
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "Socket writable. max_window:%u cur_window:%u packet_size:%u",
// (uint)max_window, (uint)cur_window, (uint)get_packet_size());
//#endif
utp_call_on_state_change(this.ctx, this, UTP_STATE_WRITABLE);
}
if (state >= CS_CONNECTED && state <= CS_FIN_SENT) {
if ((int)(ctx.current_ms - last_sent_packet) >= KEEPALIVE_INTERVAL) {
send_keep_alive();
}
}
break;
}
// Close?
case CS_GOT_FIN:
case CS_DESTROY_DELAY:
if ((int)(ctx.current_ms - rto_timeout) >= 0) {
state = (state == CS_DESTROY_DELAY) ? CS_DESTROY : CS_RESET;
if (cur_window_packets.i > 0) {
utp_call_on_error(ctx, this, UTP_ECONNRESET);
}
}
break;
// prevent warning
case CS_UNINITIALIZED:
case CS_IDLE:
case CS_RESET:
case CS_DESTROY:
break;
}
}
// this should be called every time we change mtu_floor or mtu_ceiling
void mtu_search_update()
{
if (ASSERTS)_assert(mtu_floor <= mtu_ceiling);
// binary search
mtu_last = (mtu_floor + mtu_ceiling) / 2;
// enable a new probe to be sent
mtu_probe_seq = mtu_probe_size = 0;
// if the floor and ceiling are close enough, consider the
// MTU binary search complete. We set the current value
// to floor since that's the only size we know can go through
// also set the ceiling to floor to terminate the searching
if (mtu_ceiling - mtu_floor <= 16) {
mtu_last = mtu_floor;
//log(UTP_LOG_MTU, "MTU [DONE] floor:%d ceiling:%d current:%d"
// , mtu_floor, mtu_ceiling, mtu_last);
mtu_ceiling = mtu_floor;
if (ASSERTS)_assert(mtu_floor <= mtu_ceiling);
// Do another search in 30 minutes
mtu_discover_time = utp_call_get_milliseconds(this.ctx, this) + 30 * 60 * 1000;
}
}
void mtu_reset()
{
mtu_ceiling = get_udp_mtu();
// Less would not pass TCP...
mtu_floor = 576;
//log(UTP_LOG_MTU, "MTU [RESET] floor:%d ceiling:%d current:%d"
// , mtu_floor, mtu_ceiling, mtu_last);
if (ASSERTS)_assert(mtu_floor <= mtu_ceiling);
mtu_discover_time = utp_call_get_milliseconds(this.ctx, this) + 30 * 60 * 1000;
}
// returns:
// 0: the packet was acked.
// 1: it means that the packet had already been acked
// 2: the packet has not been sent yet
int ack_packet(UnsignedShort seq)
{
OutgoingPacket pkt = outbuf.get(seq.i);
// the packet has already been acked (or not sent)
if (pkt == null) {
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "got ack for:%u (already acked, or never sent)", seq);
//#endif
return 1;
}
// can't ack packets that haven't been sent yet!
if (pkt.transmissions == 0) {
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "got ack for:%u (never sent, pkt_size:%u need_resend:%u)",
// seq, (uint)pkt->payload, pkt->need_resend);
//#endif
return 2;
}
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "got ack for:%u (pkt_size:%u need_resend:%u)",
// seq, (uint)pkt->payload, pkt->need_resend);
//#endif
outbuf.put(seq.i, null);
// if we never re-sent the packet, update the RTT estimate
if (pkt.transmissions == 1) {
// Estimate the round trip time.
int ertt = (int)((utp_call_get_microseconds(this.ctx, this) - pkt.time_sent) / 1000);
if (rtt == 0) {
// First round trip time sample
rtt = ertt;
rtt_var = ertt / 2;
// sanity check. rtt should never be more than 6 seconds
// assert(rtt < 6000);
} else {
// Compute new round trip times
int delta = (int)rtt - ertt;
rtt_var = rtt_var + (int)(Math.abs(delta) - rtt_var) / 4;
rtt = rtt - rtt/8 + ertt/8;
// sanity check. rtt should never be more than 6 seconds
// assert(rtt < 6000);
rtt_hist.add_sample(ertt, ctx.current_ms);
}
rto = Math.max(rtt + rtt_var * 4, 1000);
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "rtt:%u avg:%u var:%u rto:%u",
// ertt, rtt, rtt_var, rto);
//#endif
}
retransmit_timeout = rto;
rto_timeout = ctx.current_ms + rto;
// if need_resend is set, this packet has already
// been considered timed-out, and is not included in
// the cur_window anymore
if (!pkt.need_resend) {
if (ASSERTS)_assert(cur_window >= pkt.payload);
cur_window -= pkt.payload;
}
//free(pkt);
retransmit_count = 0;
return 0;
}
// count the number of bytes that were acked by the EACK header
int selective_ack_bytes(int base, byte[] mask, int len, long[] min_rtt)
{
if (cur_window_packets.i == 0) return 0;
int acked_bytes = 0;
int bits = len * 8 - 1; // PARG - fixed this as the -1 was missing...
long now = utp_call_get_microseconds(this.ctx, this);
do {
int v = base + bits;
// ignore bits that haven't been sent yet
// see comment in UTPSocket::selective_ack
if (((seq_nr.i - v - 1) & ACK_NR_MASK) >= ((cur_window_packets.i - 1) & ACK_NR_MASK )) // PARG - check?
continue;
// ignore bits that represents packets we haven't sent yet
// or packets that have already been acked
OutgoingPacket pkt = outbuf.get(v);
if (pkt==null || pkt.transmissions == 0)
continue;
// Count the number of segments that were successfully received past it.
if (bits >= 0 && (mask[bits>>3] & (1 << (bits & 7)))!=0) {
if (ASSERTS)_assert((int)(pkt.payload) >= 0);
acked_bytes += pkt.payload;
if (pkt.time_sent < now)
min_rtt[0] = Math.min(min_rtt[0], now - pkt.time_sent);
else
min_rtt[0] = Math.min(min_rtt[0], 50000);
continue;
}
} while (--bits >= -1);
return acked_bytes;
}
public static final int MAX_EACK = 128;
void selective_ack(int base, byte[] mask, int len)
{
if (cur_window_packets.i == 0) return;
// the range is inclusive [0, 31] bits
int bits = len * 8 - 1;
int count = 0;
// resends is a stack of sequence numbers we need to resend. Since we
// iterate in reverse over the acked packets, at the end, the top packets
// are the ones we want to resend
// PARG - previous impl had a bug here regarding resends array size - maybe this has been fixed?
int[] resends = new int[MAX_EACK];
int nr = 0;
/*
#if UTP_DEBUG_LOGGING
char bitmask[1024] = {0};
int counter = bits;
for (int i = 0; i <= bits; ++i) {
bool bit_set = counter >= 0 && mask[counter>>3] & (1 << (counter & 7));
bitmask[i] = bit_set ? '1' : '0';
--counter;
}
log(UTP_LOG_DEBUG, "Got EACK [%s] base:%u", bitmask, base);
#endif
*/
do {
// we're iterating over the bits from higher sequence numbers
// to lower (kind of in reverse order, wich might not be very
// intuitive)
int v = base + bits;
// ignore bits that haven't been sent yet
// and bits that fall below the ACKed sequence number
// this can happen if an EACK message gets
// reordered and arrives after a packet that ACKs up past
// the base for thie EACK message
// this is essentially the same as:
// if v >= seq_nr || v <= seq_nr - cur_window_packets
// but it takes wrapping into account
// if v == seq_nr the -1 will make it wrap. if v > seq_nr
// it will also wrap (since it will fall further below 0)
// and be > cur_window_packets.
// if v == seq_nr - cur_window_packets, the result will be
// seq_nr - (seq_nr - cur_window_packets) - 1
// == seq_nr - seq_nr + cur_window_packets - 1
// == cur_window_packets - 1 which will be caught by the
// test. If v < seq_nr - cur_window_packets the result will grow
// fall furhter outside of the cur_window_packets range.
// sequence number space:
//
// rejected < accepted > rejected
// <============+--------------+============>
// ^ ^
// | |
// (seq_nr-wnd) seq_nr
if (((seq_nr.i - v - 1) & ACK_NR_MASK) >= (short)(cur_window_packets.i - 1))
continue;
// this counts as a duplicate ack, even though we might have
// received an ack for this packet previously (in another EACK
// message for instance)
boolean bit_set = bits >= 0 && ( mask[bits>>3] & (1 << (bits & 7))) != 0;
// if this packet is acked, it counts towards the duplicate ack counter
if (bit_set) count++;
// ignore bits that represents packets we haven't sent yet
// or packets that have already been acked
OutgoingPacket pkt = outbuf.get(v);
if (pkt == null || pkt.transmissions == 0) {
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "skipping %u. pkt:%08x transmissions:%u %s",
// v, pkt, pkt?pkt->transmissions:0, pkt?"(not sent yet?)":"(already acked?)");
//#endif
continue;
}
// Count the number of segments that were successfully received past it.
if (bit_set) {
// the selective ack should never ACK the packet we're waiting for to decrement cur_window_packets
if (ASSERTS)_assert((v & outbuf.mask) != ((seq_nr.i - cur_window_packets.i) & outbuf.mask));
ack_packet(new UnsignedShort( v ));
continue;
}
// Resend segments
// if count is less than our re-send limit, we haven't seen enough
// acked packets in front of this one to warrant a re-send.
// if count == 0, we're still going through the tail of zeroes
if (((v - fast_resend_seq_nr.i) & ACK_NR_MASK) <= OUTGOING_BUFFER_MAX_SIZE &&
count >= DUPLICATE_ACKS_BEFORE_RESEND) {
// resends is a stack, and we're mostly interested in the top of it
// if we're full, just throw away the lower half
if (nr >= MAX_EACK - 2) {
//memmove(resends, &resends[MAX_EACK/2], MAX_EACK/2 * sizeof(resends[0]));
System.arraycopy(resends, MAX_EACK/2, resends, 0, MAX_EACK/2);
nr -= MAX_EACK / 2;
}
resends[nr++] = v;
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "no ack for %u", v);
//#endif
} else {
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "not resending %u count:%d dup_ack:%u fast_resend_seq_nr:%u",
// v, count, duplicate_ack, fast_resend_seq_nr);
//#endif
}
} while (--bits >= -1);
if (((base - 1 - fast_resend_seq_nr.i) & ACK_NR_MASK) <= OUTGOING_BUFFER_MAX_SIZE &&
count >= DUPLICATE_ACKS_BEFORE_RESEND) {
// if we get enough duplicate acks to start
// resending, the first packet we should resend
// is base-1
resends[nr++] = (base - 1) & ACK_NR_MASK;
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "no ack for %u", (base - 1) & ACK_NR_MASK);
//#endif
} else {
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "not resending %u count:%d dup_ack:%u fast_resend_seq_nr:%u",
// base - 1, count, duplicate_ack, fast_resend_seq_nr);
//#endif
}
boolean back_off = false;
int i = 0;
while (nr > 0) {
int v = resends[--nr];
// don't consider the tail of 0:es to be lost packets
// only unacked packets with acked packets after should
// be considered lost
OutgoingPacket pkt = outbuf.get(v);
// this may be an old (re-ordered) packet, and some of the
// packets in here may have been acked already. In which
// case they will not be in the send queue anymore
if (pkt==null) continue;
// used in parse_log.py
//log(UTP_LOG_NORMAL, "Packet %u lost. Resending", v);
// On Loss
back_off = true;
//#ifdef _DEBUG
//++_stats.rexmit;
//#endif
send_packet(pkt);
fast_resend_seq_nr.set((short)(v + 1));
// Re-send max 4 packets.
if (++i >= 4) break;
}
if (back_off)
maybe_decay_win(ctx.current_ms);
duplicate_ack = (byte)count;
}
void apply_ccontrol( int bytes_acked, int actual_delay, long min_rtt)
{
// the delay can never be greater than the rtt. The min_rtt
// variable is the RTT in microseconds
if (ASSERTS)_assert(min_rtt >= 0);
int our_delay = (int)( Math.min(our_hist.get_value(), uint32(min_rtt)));
if (ASSERTS)_assert(our_delay != INT_MAX);
if (ASSERTS)_assert(our_delay >= 0);
//System.out.println( "delays: " + our_delay + ", " + this.mtu_last);
utp_call_on_delay_sample(this.ctx, this, our_delay / 1000);
// This test the connection under heavy load from foreground
// traffic. Pretend that our delays are very high to force the
// connection to use sub-packet size window sizes
//our_delay *= 4;
// target is microseconds
int target = target_delay;
if (target <= 0) target = 100000;
// this is here to compensate for very large clock drift that affects
// the congestion controller into giving certain endpoints an unfair
// share of the bandwidth. We have an estimate of the clock drift
// (clock_drift). The unit of this is microseconds per 5 seconds.
// empirically, a reasonable cut-off appears to be about 200000
// (which is pretty high). The main purpose is to compensate for
// people trying to "cheat" uTP by making their clock run slower,
// and this definitely catches that without any risk of false positives
// if clock_drift < -200000 start applying a penalty delay proportional
// to how far beoynd -200000 the clock drift is
int penalty = 0;
if (clock_drift < -200000) {
penalty = (-clock_drift - 200000) / 7;
our_delay += penalty;
}
double off_target = target - our_delay;
// this is the same as:
//
// (min(off_target, target) / target) * (bytes_acked / max_window) * MAX_CWND_INCREASE_BYTES_PER_RTT
//
// so, it's scaling the max increase by the fraction of the window this ack represents, and the fraction
// of the target delay the current delay represents.
// The min() around off_target protects against crazy values of our_delay, which may happen when th
// timestamps wraps, or by just having a malicious peer sending garbage. This caps the increase
// of the window size to MAX_CWND_INCREASE_BYTES_PER_RTT per rtt.
// as for large negative numbers, this direction is already capped at the min packet size further down
// the min around the bytes_acked protects against the case where the window size was recently
// shrunk and the number of acked bytes exceeds that. This is considered no more than one full
// window, in order to keep the gain within sane boundries.
if (ASSERTS)_assert(bytes_acked > 0);
double window_factor = (double)Math.min(bytes_acked, max_window) / (double)Math.max(max_window, bytes_acked);
double delay_factor = off_target / target;
double scaled_gain = MAX_CWND_INCREASE_BYTES_PER_RTT * window_factor * delay_factor;
// since MAX_CWND_INCREASE_BYTES_PER_RTT is a cap on how much the window size (max_window)
// may increase per RTT, we may not increase the window size more than that proportional
// to the number of bytes that were acked, so that once one window has been acked (one rtt)
// the increase limit is not exceeded
// the +1. is to allow for floating point imprecision
if (ASSERTS)_assert(scaled_gain <= 1. + MAX_CWND_INCREASE_BYTES_PER_RTT * (double)Math.min(bytes_acked, max_window) / (double)Math.max(max_window, bytes_acked));
if (scaled_gain > 0 && ctx.current_ms - last_maxed_out_window > 1000) {
// if it was more than 1 second since we tried to send a packet
// and stopped because we hit the max window, we're most likely rate
// limited (which prevents us from ever hitting the window size)
// if this is the case, we cannot let the max_window grow indefinitely
scaled_gain = 0;
}
int ledbat_cwnd = (int)((max_window + scaled_gain < MIN_WINDOW_SIZE)?MIN_WINDOW_SIZE:max_window + scaled_gain );
if (slow_start) {
int ss_cwnd = (int)( max_window + window_factor*get_packet_size());
if (ss_cwnd > ssthresh) {
slow_start = false;
} else if (our_delay > target*0.9) {
// even if we're a little under the target delay, we conservatively
// discontinue the slow start phase
slow_start = false;
ssthresh = max_window;
} else {
max_window = Math.max(ss_cwnd, ledbat_cwnd);
}
} else {
max_window = ledbat_cwnd;
}
// make sure that the congestion window is below max
// make sure that we don't shrink our window too small
//max_window = clamp<size_t>(max_window, MIN_WINDOW_SIZE, opt_sndbuf);
max_window = Math.min( max_window, Math.max( MIN_WINDOW_SIZE, opt_sndbuf));
// used in parse_log.py
/*
log(UTP_LOG_NORMAL, "actual_delay:%u our_delay:%d their_delay:%u off_target:%d max_window:%u "
"delay_base:%u delay_sum:%d target_delay:%d acked_bytes:%u cur_window:%u "
"scaled_gain:%f rtt:%u rate:%u wnduser:%u rto:%u timeout:%d get_microseconds:"I64u" "
"cur_window_packets:%u packet_size:%u their_delay_base:%u their_actual_delay:%u "
"average_delay:%d clock_drift:%d clock_drift_raw:%d delay_penalty:%d current_delay_sum:"I64u
"current_delay_samples:%d average_delay_base:%d last_maxed_out_window:"I64u" opt_sndbuf:%d "
"current_ms:"I64u"",
actual_delay, our_delay / 1000, their_hist.get_value() / 1000,
int(off_target / 1000), uint(max_window), uint32(our_hist.delay_base),
int((our_delay + their_hist.get_value()) / 1000), int(target / 1000), uint(bytes_acked),
(uint)(cur_window - bytes_acked), (float)(scaled_gain), rtt,
(uint)(max_window * 1000 / (rtt_hist.delay_base?rtt_hist.delay_base:50)),
(uint)max_window_user, rto, (int)(rto_timeout - ctx->current_ms),
utp_call_get_microseconds(this->ctx, this), cur_window_packets, (uint)get_packet_size(),
their_hist.delay_base, their_hist.delay_base + their_hist.get_value(),
average_delay, clock_drift, clock_drift_raw, penalty / 1000,
current_delay_sum, current_delay_samples, average_delay_base,
uint64(last_maxed_out_window), int(opt_sndbuf), uint64(ctx->current_ms));
*/
}
// returns the max number of bytes of payload the uTP
// connection is allowed to send
int get_packet_size()
{
int header_size = sizeof_PacketFormatV1;
int mtu = mtu_last != 0 ? mtu_last : mtu_ceiling;
return mtu - header_size;
}
void UTP_Free()
{
//#if UTP_DEBUG_LOGGING
//log(UTP_LOG_DEBUG, "Killing socket");
//#endif
utp_call_on_state_change(ctx, this, UTP_STATE_DESTROYING);
//if (ctx->last_utp_socket == this) {
// ctx->last_utp_socket = NULL;
//}
// Remove object from the global hash table
UTPSocketKeyData kd = ctx.utp_sockets.remove(new UTPSocketKey(addr, conn_id_recv));
//System.out.println( "Sockets=" + ctx.utp_sockets.size());
if(ASSERTS)_assert(kd);
// remove the socket from ack_sockets if it was there also
removeSocketFromAckList(this);
/*
// Free all memory occupied by the socket object.
for (size_t i = 0; i <= inbuf.mask; i++) {
free(inbuf.elements[i]);
}
for (size_t i = 0; i <= outbuf.mask; i++) {
free(outbuf.elements[i]);
}
// TODO: The circular buffer should have a destructor
free(inbuf.elements);
free(outbuf.elements);
*/
}
} // **** PARG - END OF UTPSocketImpl
void send_to_addr(utp_context ctx, byte[] p, InetSocketAddress addr )
{
send_to_addr( ctx, p, addr, 0 );
}
void send_to_addr(utp_context ctx, byte[] p, InetSocketAddress addr, int flags )
{
//socklen_t tolen;
//SOCKADDR_STORAGE to = addr.get_sockaddr_storage(&tolen);
int len = p.length;
utp_register_sent_packet(ctx, len);
utp_call_sendto(ctx, null, p, len, addr, flags);
}
void utp_register_sent_packet(utp_context ctx, int length)
{
if (length <= PACKET_SIZE_MID) {
if (length <= PACKET_SIZE_EMPTY){
ctx.context_stats._nraw_send[PACKET_SIZE_EMPTY_BUCKET]++;
} else if (length <= PACKET_SIZE_SMALL) {
ctx.context_stats._nraw_send[PACKET_SIZE_SMALL_BUCKET]++;
} else
ctx.context_stats._nraw_send[PACKET_SIZE_MID_BUCKET]++;
} else {
if (length <= PACKET_SIZE_BIG) {
ctx.context_stats._nraw_send[PACKET_SIZE_BIG_BUCKET]++;
} else
ctx.context_stats._nraw_send[PACKET_SIZE_HUGE_BUCKET]++;
}
}
void utp_register_recv_packet(UTPSocketImpl conn, int len)
{
//#ifdef _DEBUG
//++conn->_stats.nrecv;
//conn->_stats.nbytes_recv += len;
//#endif
if (len <= PACKET_SIZE_MID) {
if (len <= PACKET_SIZE_EMPTY) {
conn.ctx.context_stats._nraw_recv[PACKET_SIZE_EMPTY_BUCKET]++;
} else if (len <= PACKET_SIZE_SMALL) {
conn.ctx.context_stats._nraw_recv[PACKET_SIZE_SMALL_BUCKET]++;
} else
conn.ctx.context_stats._nraw_recv[PACKET_SIZE_MID_BUCKET]++;
} else {
if (len <= PACKET_SIZE_BIG) {
conn.ctx.context_stats._nraw_recv[PACKET_SIZE_BIG_BUCKET]++;
} else
conn.ctx.context_stats._nraw_recv[PACKET_SIZE_HUGE_BUCKET]++;
}
}
void send_rst(utp_context ctx,
InetSocketAddress addr, int conn_id_send, short ack_nr, short seq_nr)
{
//PacketFormatV1 pf1;
//zeromem(&pf1);
//size_t len;
PacketFormatBase pfb;
PacketFormatV1 pf1 = new PacketFormatV1();
pfb = pf1;
pf1.set_version(1);
pf1.set_type(ST_RESET);
pf1.ext = 0;
pf1.connid = (short)conn_id_send;
pf1.ack_nr = ack_nr;
pf1.seq_nr = seq_nr;
pf1.windowsize = 0;
//len = sizeof(PacketFormatV1);
// LOG_DEBUG("%s: Sending RST id:%u seq_nr:%u ack_nr:%u", addrfmt(addr, addrbuf), conn_id_send, seq_nr, ack_nr);
// LOG_DEBUG("send %s len:%u id:%u", addrfmt(addr, addrbuf), (uint)len, conn_id_send);
send_to_addr(ctx, pfb.serialise(), addr);
}
// Process an incoming packet
// syn is true if this is the first packet received. It will cut off parsing
// as soon as the header is done
int utp_process_incoming(UTPSocketImpl conn, PacketFormatDeserialised deserialised, int len)
{
return( utp_process_incoming( conn, deserialised, len, false ));
}
int utp_process_incoming(UTPSocketImpl conn, PacketFormatDeserialised deserialised, int len, boolean syn)
{
utp_register_recv_packet(conn, len);
conn.ctx.current_ms = utp_call_get_milliseconds(conn.ctx, conn);
PacketFormatV1 pf1 = deserialised.header;
//const byte *packet_end = packet + len;
short pk_seq_nr = pf1.seq_nr;
short pk_ack_nr = pf1.ack_nr;
byte pk_flags = pf1.type();
if (pk_flags >= ST_NUM_STATES) return 0;
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Got %s. seq_nr:%u ack_nr:%u state:%s timestamp:"I64u" reply_micro:%u"
// , flagnames[pk_flags], pk_seq_nr, pk_ack_nr, statenames[conn->state]
// , uint64(pf1->tv_usec), (uint32)(pf1->reply_micro));
//#endif
// mark receipt time
long time = utp_call_get_microseconds(conn.ctx, conn);
// RSTs are handled earlier, since the connid matches the send id not the recv id
if (ASSERTS)_assert(pk_flags != ST_RESET);
// TODO: maybe send a ST_RESET if we're in CS_RESET?
//const byte *selack_ptr = NULL;
byte[] selack_bytes = null;
// Unpack UTP packet options
// Data pointer
/*
const byte *data = (const byte*)pf1 + conn->get_header_size();
if (conn->get_header_size() > len) {
#if UTP_DEBUG_LOGGING
conn->log(UTP_LOG_DEBUG, "Invalid packet size (less than header size)");
#endif
return 0;
}
// Skip the extension headers
uint extension = pf1->ext;
if (extension != 0) {
do {
// Verify that the packet is valid.
data += 2;
if ((int)(packet_end - data) < 0 || (int)(packet_end - data) < data[-1]) {
#if UTP_DEBUG_LOGGING
conn->log(UTP_LOG_DEBUG, "Invalid len of extensions");
#endif
return 0;
}
switch(extension) {
case 1: // Selective Acknowledgment
selack_ptr = data;
break;
case 2: // extension bits
if (data[-1] != 8) {
#if UTP_DEBUG_LOGGING
conn->log(UTP_LOG_DEBUG, "Invalid len of extension bits header");
#endif
return 0;
}
memcpy(conn->extensions, data, 8);
#if UTP_DEBUG_LOGGING
conn->log(UTP_LOG_DEBUG, "got extension bits:%02x%02x%02x%02x%02x%02x%02x%02x",
conn->extensions[0], conn->extensions[1], conn->extensions[2], conn->extensions[3],
conn->extensions[4], conn->extensions[5], conn->extensions[6], conn->extensions[7]);
#endif
}
extension = data[-2];
data += data[-1];
} while (extension);
}
*/
for ( PacketFormatExtensionDeserialised ext_record: deserialised.exts ){
byte extension = ext_record.ext;
switch(extension) {
case 1: // Selective Acknowledgment
selack_bytes = ext_record.ext_data;
break;
case 2: // extension bits
conn.extensions = ext_record.ext_data;
//memcpy(conn.extensions, data, 8);
//LOG_UTPV("0x%08x: got extension bits:%02x%02x%02x%02x%02x%02x%02x%02x", conn,
// conn.extensions[0], conn.extensions[1], conn.extensions[2], conn.extensions[3],
// conn.extensions[4], conn.extensions[5], conn.extensions[6], conn.extensions[7]);
}
}
if (conn.state == CS_SYN_SENT) {
// if this is a syn-ack, initialize our ack_nr
// to match the sequence number we got from
// the other end
conn.ack_nr.set((short)((pk_seq_nr - 1) & SEQ_NR_MASK));
}
conn.last_got_packet = conn.ctx.current_ms;
if (syn) {
return 0;
}
// seqnr is the number of packets past the expected
// packet this is. ack_nr is the last acked, seq_nr is the
// current. Subtracring 1 makes 0 mean "this is the next
// expected packet".
int seqnr = (pk_seq_nr - conn.ack_nr.i - 1) & SEQ_NR_MASK;
// Getting an invalid sequence number?
if (seqnr >= REORDER_BUFFER_MAX_SIZE) {
if (seqnr >= (SEQ_NR_MASK + 1) - REORDER_BUFFER_MAX_SIZE && pk_flags != ST_STATE) {
conn.schedule_ack();
}
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, " Got old Packet/Ack (%u/%u)=%u"
// , pk_seq_nr, conn->ack_nr, seqnr);
//#endif
return 0;
}
// Process acknowledgment
// acks is the number of packets that was acked
int acks = (pk_ack_nr - (conn.seq_nr.i - 1 - conn.cur_window_packets.i)) & ACK_NR_MASK;
// this happens when we receive an old ack nr
if (acks > conn.cur_window_packets.i) acks = 0;
// if we get the same ack_nr as in the last packet
// increase the duplicate_ack counter, otherwise reset
// it to 0
if (conn.cur_window_packets.i > 0) {
if (pk_ack_nr == ((conn.seq_nr.i - conn.cur_window_packets.i - 1) & ACK_NR_MASK) &&
conn.cur_window_packets.i > 0) {
++conn.duplicate_ack;
if (conn.duplicate_ack == DUPLICATE_ACKS_BEFORE_RESEND && conn.mtu_probe_seq != 0 ) {
// It's likely that the probe was rejected due to its size, but we haven't got an
// ICMP report back yet
if (pk_ack_nr == ((conn.mtu_probe_seq - 1) & ACK_NR_MASK)) {
conn.mtu_ceiling = conn.mtu_probe_size - 1;
conn.mtu_search_update();
//conn->log(UTP_LOG_MTU, "MTU [DUPACK] floor:%d ceiling:%d current:%d"
// , conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last);
} else {
// A non-probe was blocked before our probe.
// Can't conclude much, send a new probe
conn.mtu_probe_seq = conn.mtu_probe_size = 0;
}
}
} else {
conn.duplicate_ack = 0;
}
// TODO: if duplicate_ack == DUPLICATE_ACK_BEFORE_RESEND
// and fast_resend_seq_nr <= ack_nr + 1
// resend ack_nr + 1
// also call maybe_decay_win()
}
// figure out how many bytes were acked
int acked_bytes = 0;
// the minimum rtt of all acks
// this is the upper limit on the delay we get back
// from the other peer. Our delay cannot exceed
// the rtt of the packet. If it does, clamp it.
// this is done in apply_ledbat_ccontrol()
long min_rtt = INT64_MAX;
long now = utp_call_get_microseconds(conn.ctx, conn);
for (int i = 0; i < acks; ++i) {
int seq = (conn.seq_nr.i - conn.cur_window_packets.i + i) & ACK_NR_MASK;
OutgoingPacket pkt = conn.outbuf.get(seq);
if (pkt == null || pkt.transmissions == 0) continue;
if (ASSERTS)_assert((int)(pkt.payload) >= 0);
acked_bytes += pkt.payload;
if (conn.mtu_probe_seq != 0 && seq == conn.mtu_probe_seq) {
conn.mtu_floor = conn.mtu_probe_size;
conn.mtu_search_update();
//conn->log(UTP_LOG_MTU, "MTU [ACK] floor:%d ceiling:%d current:%d"
// , conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last);
}
// in case our clock is not monotonic
if (pkt.time_sent < now)
min_rtt = Math.min(min_rtt, now - pkt.time_sent);
else
min_rtt = Math.min(min_rtt, 50000);
}
// count bytes acked by EACK
if (selack_bytes != null) {
long[] min_rtt_updated = { min_rtt };
acked_bytes += conn.selective_ack_bytes((pk_ack_nr + 2) & ACK_NR_MASK,
selack_bytes, selack_bytes.length, min_rtt_updated);
min_rtt = min_rtt_updated[0];
}
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "acks:%d acked_bytes:%u seq_nr:%d cur_window:%u cur_window_packets:%u relative_seqnr:%u max_window:%u min_rtt:%u rtt:%u",
// acks, (uint)acked_bytes, conn->seq_nr, (uint)conn->cur_window, conn->cur_window_packets,
// seqnr, (uint)conn->max_window, (uint)(min_rtt / 1000), conn->rtt);
//#endif
long p = pf1.tv_usec;
conn.last_measured_delay = conn.ctx.current_ms;
// get delay in both directions
// record the delay to report back
int their_delay = (int)(p == 0 ? 0 : time - p);
conn.reply_micro = their_delay;
int prev_delay_base = conn.their_hist.delay_base;
if (their_delay != 0) conn.their_hist.add_sample(their_delay, conn.ctx.current_ms);
// if their new delay base is less than their previous one
// we should shift our delay base in the other direction in order
// to take the clock skew into account
if (prev_delay_base != 0 &&
wrapping_compare_less(conn.their_hist.delay_base, prev_delay_base, TIMESTAMP_MASK)) {
// never adjust more than 10 milliseconds
if (prev_delay_base - conn.their_hist.delay_base <= 10000) {
conn.our_hist.shift(prev_delay_base - conn.their_hist.delay_base);
}
}
int actual_delay = (int)((uint32(pf1.reply_micro)==INT_MAX?0:uint32(pf1.reply_micro)));
// if the actual delay is 0, it means the other end
// hasn't received a sample from us yet, and doesn't
// know what it is. We can't update out history unless
// we have a true measured sample
prev_delay_base = conn.our_hist.delay_base;
if (actual_delay != 0) {
conn.our_hist.add_sample(actual_delay, conn.ctx.current_ms);
// this is keeping an average of the delay samples
// we've recevied within the last 5 seconds. We sum
// all the samples and increase the count in order to
// calculate the average every 5 seconds. The samples
// are based off of the average_delay_base to deal with
// wrapping counters.
if (conn.average_delay_base == 0) conn.average_delay_base = actual_delay;
long average_delay_sample = 0;
// distance walking from lhs to rhs, downwards
long dist_down = (conn.average_delay_base - actual_delay)&TIMESTAMP_MASK;
// distance walking from lhs to rhs, upwards
long dist_up = (actual_delay - conn.average_delay_base)&TIMESTAMP_MASK;
if (dist_down > dist_up) {
// assert(dist_up < INT_MAX / 4);
// average_delay_base < actual_delay, we should end up
// with a positive sample
average_delay_sample = dist_up;
} else {
// assert(-int64(dist_down) < INT_MAX / 4);
// average_delay_base >= actual_delay, we should end up
// with a negative sample
average_delay_sample = -dist_down;
}
conn.current_delay_sum += average_delay_sample;
++conn.current_delay_samples;
if (conn.ctx.current_ms > conn.average_sample_time) {
int prev_average_delay = conn.average_delay;
if(ASSERTS)_assert(conn.current_delay_sum / conn.current_delay_samples < INT_MAX);
if(ASSERTS)_assert(conn.current_delay_sum / conn.current_delay_samples > -INT_MAX);
// write the new average
conn.average_delay = (int)( conn.current_delay_sum / conn.current_delay_samples );
// each slot represents 5 seconds
conn.average_sample_time += 5000;
conn.current_delay_sum = 0;
conn.current_delay_samples = 0;
// this makes things very confusing when logging the average delay
//#if !g_log_utp
// normalize the average samples
// since we're only interested in the slope
// of the curve formed by the average delay samples,
// we can cancel out the actual offset to make sure
// we won't have problems with wrapping.
int min_sample = Math.min(prev_average_delay, conn.average_delay);
int max_sample = Math.max(prev_average_delay, conn.average_delay);
// normalize around zero. Try to keep the min <= 0 and max >= 0
int adjust = 0;
if (min_sample > 0) {
// adjust all samples (and the baseline) down by min_sample
adjust = -min_sample;
} else if (max_sample < 0) {
// adjust all samples (and the baseline) up by -max_sample
adjust = -max_sample;
}
if (adjust != 0 ) {
conn.average_delay_base -= adjust;
conn.average_delay += adjust;
prev_average_delay += adjust;
}
//#endif
// update the clock drift estimate
// the unit is microseconds per 5 seconds
// what we're doing is just calculating the average of the
// difference between each slot. Since each slot is 5 seconds
// and the timestamps unit are microseconds, we'll end up with
// the average slope across our history. If there is a consistent
// trend, it will show up in this value
//int64 slope = 0;
int drift = conn.average_delay - prev_average_delay;
// clock_drift is a rolling average
conn.clock_drift = ((conn.clock_drift) * 7 + drift) / 8;
conn.clock_drift_raw = drift;
}
}
// if our new delay base is less than our previous one
// we should shift the other end's delay base in the other
// direction in order to take the clock skew into account
// This is commented out because it creates bad interactions
// with our adjustment in the other direction. We don't really
// need our estimates of the other peer to be very accurate
// anyway. The problem with shifting here is that we're more
// likely shift it back later because of a low latency. This
// second shift back would cause us to shift our delay base
// which then get's into a death spiral of shifting delay bases
/* if (prev_delay_base != 0 &&
wrapping_compare_less(conn->our_hist.delay_base, prev_delay_base)) {
// never adjust more than 10 milliseconds
if (prev_delay_base - conn->our_hist.delay_base <= 10000) {
conn->their_hist.Shift(prev_delay_base - conn->our_hist.delay_base);
}
}
*/
// if the delay estimate exceeds the RTT, adjust the base_delay to
// compensate
if(ASSERTS)_assert(min_rtt >= 0);
if ((long)(conn.our_hist.get_value()) > min_rtt) {
conn.our_hist.shift((int)( conn.our_hist.get_value() - min_rtt));
}
// only apply the congestion controller on acks
// if we don't have a delay measurement, there's
// no point in invoking the congestion control
if (actual_delay != 0 && acked_bytes >= 1)
conn.apply_ccontrol(acked_bytes, actual_delay, min_rtt);
// sanity check, the other end should never ack packets
// past the point we've sent
if (acks <= conn.cur_window_packets.i) {
conn.max_window_user = pf1.windowsize;
// If max user window is set to 0, then we startup a timer
// That will reset it to 1 after 15 seconds.
if (conn.max_window_user == 0)
// Reset max_window_user to 1 every 15 seconds.
conn.zerowindow_time = conn.ctx.current_ms + 15000;
// Respond to connect message
// Switch to CONNECTED state.
if (conn.state == CS_SYN_SENT) {
conn.state = CS_CONNECTED;
// If the user has defined the ON_CONNECT callback, use that to
// notify the user that the socket is now connected. If ON_CONNECT
// has not been defined, notify the user via ON_STATE_CHANGE.
if (conn.ctx.callbacks[UTP_ON_CONNECT] != null)
utp_call_on_connect(conn.ctx, conn);
else
utp_call_on_state_change(conn.ctx, conn, UTP_STATE_CONNECT);
// We've sent a fin, and everything was ACKed (including the FIN),
// it's safe to destroy the socket. cur_window_packets == acks
// means that this packet acked all the remaining packets that
// were in-flight.
} else if (conn.state == CS_FIN_SENT && conn.cur_window_packets.i == acks) {
conn.state = CS_DESTROY;
}
// Update fast resend counter
if (wrapping_compare_less(conn.fast_resend_seq_nr.i
, (pk_ack_nr + 1) & ACK_NR_MASK, ACK_NR_MASK))
conn.fast_resend_seq_nr.set((short)((pk_ack_nr + 1) & ACK_NR_MASK));
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "fast_resend_seq_nr:%u", conn->fast_resend_seq_nr);
//#endif
for (int i = 0; i < acks; ++i) {
int ack_status = conn.ack_packet( new UnsignedShort((short)( conn.seq_nr.i - conn.cur_window_packets.i)));
// if ack_status is 0, the packet was acked.
// if acl_stauts is 1, it means that the packet had already been acked
// if it's 2, the packet has not been sent yet
// We need to break this loop in the latter case. This could potentially
// happen if we get an ack_nr that does not exceed what we have stuffed
// into the outgoing buffer, but does exceed what we have sent
if (ack_status == 2) {
//#ifdef _DEBUG
//OutgoingPacket* pkt = (OutgoingPacket*)conn->outbuf.get(conn->seq_nr - conn->cur_window_packets);
//assert(pkt->transmissions == 0);
//#endif
break;
}
conn.cur_window_packets.dec();
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "decementing cur_window_packets:%u", conn->cur_window_packets);
//#endif
}
//#ifdef _DEBUG
//if (conn->cur_window_packets == 0)
// assert(conn->cur_window == 0);
//#endif
// packets in front of this may have been acked by a
// selective ack (EACK). Keep decreasing the window packet size
// until we hit a packet that is still waiting to be acked
// in the send queue
// this is especially likely to happen when the other end
// has the EACK send bug older versions of uTP had
while (conn.cur_window_packets.i > 0 && conn.outbuf.get(conn.seq_nr.i - conn.cur_window_packets.i) == null){
conn.cur_window_packets.dec();
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "decementing cur_window_packets:%u", conn->cur_window_packets);
//#endif
}
//#ifdef _DEBUG
//if (conn->cur_window_packets == 0)
// assert(conn->cur_window == 0);
//#endif
// this invariant should always be true
if (ASSERTS)_assert(conn.cur_window_packets.i == 0 || conn.outbuf.get(conn.seq_nr.i - conn.cur_window_packets.i) != null );
// flush Nagle
if (conn.cur_window_packets.i == 1) {
OutgoingPacket pkt = conn.outbuf.get(conn.seq_nr.i - 1);
// do we still have quota?
if (pkt.transmissions == 0) {
conn.send_packet(pkt);
}
}
// Fast timeout-retry
if (conn.fast_timeout) {
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Fast timeout %u,%u,%u?", (uint)conn->cur_window, conn->seq_nr - conn->timeout_seq_nr, conn->timeout_seq_nr);
//#endif
// if the fast_resend_seq_nr is not pointing to the oldest outstanding packet, it suggests that we've already
// resent the packet that timed out, and we should leave the fast-timeout mode.
if (((conn.seq_nr.i - conn.cur_window_packets.i) & ACK_NR_MASK) != conn.fast_resend_seq_nr.i) {
conn.fast_timeout = false;
} else {
// resend the oldest packet and increment fast_resend_seq_nr
// to not allow another fast resend on it again
OutgoingPacket pkt = conn.outbuf.get(conn.seq_nr.i - conn.cur_window_packets.i);
if (pkt != null && pkt.transmissions > 0) {
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Packet %u fast timeout-retry.", conn->seq_nr - conn->cur_window_packets);
//#endif
//#ifdef _DEBUG
//++conn->_stats.fastrexmit;
//#endif
conn.fast_resend_seq_nr.inc();
conn.send_packet(pkt);
}
}
}
}
// Process selective acknowledgent
if (selack_bytes != null) {
conn.selective_ack(pk_ack_nr + 2, selack_bytes, selack_bytes.length);
}
// this invariant should always be true
if (ASSERTS)_assert(conn.cur_window_packets.i == 0 || conn.outbuf.get(conn.seq_nr.i - conn.cur_window_packets.i) != null );
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "acks:%d acked_bytes:%u seq_nr:%u cur_window:%u cur_window_packets:%u ",
// acks, (uint)acked_bytes, conn->seq_nr, (uint)conn->cur_window, conn->cur_window_packets);
//#endif
// In case the ack dropped the current window below
// the max_window size, Mark the socket as writable
if (conn.state == CS_CONNECTED_FULL && !conn.is_full()) {
conn.state = CS_CONNECTED;
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Socket writable. max_window:%u cur_window:%u packet_size:%u",
// (uint)conn->max_window, (uint)conn->cur_window, (uint)conn->get_packet_size());
//#endif
utp_call_on_state_change(conn.ctx, conn, UTP_STATE_WRITABLE);
}
if (pk_flags == ST_STATE) {
// This is a state packet only.
return 0;
}
// The connection is not in a state that can accept data?
if (conn.state != CS_CONNECTED &&
conn.state != CS_CONNECTED_FULL &&
conn.state != CS_FIN_SENT) {
return 0;
}
// Is this a finalize packet?
if (pk_flags == ST_FIN && !conn.got_fin) {
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Got FIN eof_pkt:%u", pk_seq_nr);
//#endif
conn.got_fin = true;
conn.eof_pkt.set( pk_seq_nr );
// at this point, it is possible for the
// other end to have sent packets with
// sequence numbers higher than seq_nr.
// if this is the case, our reorder_count
// is out of sync. This case is dealt with
// when we re-order and hit the eof_pkt.
// we'll just ignore any packets with
// sequence numbers past this
}
ByteBuffer packet_payload = deserialised.payload;
// Getting an in-order packet?
if (seqnr == 0) {
int count = packet_payload.remaining();
if (count > 0 && conn.state != CS_FIN_SENT) {
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Got Data len:%u (rb:%u)", (uint)count, (uint)utp_call_get_read_buffer_size(conn->ctx, conn));
//#endif
// Post bytes to the upper layer
utp_call_on_read(conn.ctx, conn, packet_payload, count);
}
conn.ack_nr.inc();
// Check if the next packet has been received too, but waiting
// in the reorder buffer.
while( true ){
if (conn.got_fin && conn.eof_pkt.i == conn.ack_nr.i ) {
if (conn.state != CS_FIN_SENT) {
conn.state = CS_GOT_FIN;
conn.rto_timeout = conn.ctx.current_ms + Math.min(conn.rto * 3, 60);
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Posting EOF");
//#endif
utp_call_on_state_change(conn.ctx, conn, UTP_STATE_EOF);
}
// if the other end wants to close, ack
conn.send_ack();
// reorder_count is not necessarily 0 at this point.
// even though it is most of the time, the other end
// may have sent packets with higher sequence numbers
// than what later end up being eof_pkt
// since we have received all packets up to eof_pkt
// just ignore the ones after it.
conn.reorder_count.set(0);
}
// Quick get-out in case there is nothing to reorder
if (conn.reorder_count.i == 0)
break;
// Check if there are additional buffers in the reorder buffers
// that need delivery.
ByteBuffer pending = conn.inbuf.get(conn.ack_nr.i+1);
if (pending == null)
break;
conn.inbuf.put(conn.ack_nr.i+1, null);
//count = *(uint*)p;
count = pending.remaining();
if (count > 0 && conn.state != CS_FIN_SENT) {
// Pass the bytes to the upper layer
utp_call_on_read(conn.ctx, conn, pending, count);
}
conn.ack_nr.inc();
// Free the element from the reorder buffer
//free(p);
if (ASSERTS)_assert(conn.reorder_count.i > 0);
conn.reorder_count.dec();
}
conn.schedule_ack();
} else {
// Getting an out of order packet.
// The packet needs to be remembered and rearranged later.
// if we have received a FIN packet, and the EOF-sequence number
// is lower than the sequence number of the packet we just received
// something is wrong.
if (conn.got_fin && pk_seq_nr > conn.eof_pkt.i) {
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Got an invalid packet sequence number, past EOF "
// "reorder_count:%u len:%u (rb:%u)",
// conn->reorder_count, (uint)(packet_end - data), (uint)utp_call_get_read_buffer_size(conn->ctx, conn));
//#endif
return 0;
}
// if the sequence number is entirely off the expected
// one, just drop it. We can't allocate buffer space in
// the inbuf entirely based on untrusted input
if (seqnr > 0x3ff) {
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "0x%08x: Got an invalid packet sequence number, too far off "
// "reorder_count:%u len:%u (rb:%u)",
// conn->reorder_count, (uint)(packet_end - data), (uint)utp_call_get_read_buffer_size(conn->ctx, conn));
//#endif
return 0;
}
// we need to grow the circle buffer before we
// check if the packet is already in here, so that
// we don't end up looking at an older packet (since
// the indices wraps around).
conn.inbuf.ensure_size(pk_seq_nr + 1, seqnr + 1);
// Has this packet already been received? (i.e. a duplicate)
// If that is the case, just discard it.
if (conn.inbuf.get(pk_seq_nr) != null) {
//#ifdef _DEBUG
//++conn->_stats.nduprecv;
//#endif
return 0;
}
// Allocate memory to fit the packet that needs to re-ordered
//byte *mem = (byte*)malloc((packet_end - data) + sizeof(uint));
//*(uint*)mem = (uint)(packet_end - data);
//memcpy(mem + sizeof(uint), data, packet_end - data);
// Insert into reorder buffer and increment the count
// of # of packets to be reordered.
// we add one to seqnr in order to leave the last
// entry empty, that way the assert in send_ack
// is valid. we have to add one to seqnr too, in order
// to make the circular buffer grow around the correct
// point (which is conn->ack_nr + 1).
if (ASSERTS)_assert(conn.inbuf.get(pk_seq_nr) == null);
if (ASSERTS)_assert((pk_seq_nr & conn.inbuf.mask) != ((conn.ack_nr.i+1) & conn.inbuf.mask));
conn.inbuf.put(pk_seq_nr, packet_payload);
conn.reorder_count.inc();
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "0x%08x: Got out of order data reorder_count:%u len:%u (rb:%u)",
// conn->reorder_count, (uint)(packet_end - data), (uint)utp_call_get_read_buffer_size(conn->ctx, conn));
//#endif
conn.schedule_ack();
}
return (int)(packet_payload.remaining());
}
byte UTP_Version(PacketFormatV1 pf)
{
return (pf.type() < ST_NUM_STATES && pf.ext < 3 ? pf.version() : 0);
}
/*
void UTP_FreeAll(struct UTPSocketHT *utp_sockets) {
utp_hash_iterator_t it;
UTPSocketKeyData* keyData;
while ((keyData = utp_sockets->Iterate(it))) {
delete keyData->socket;
}
}
*/
void utp_initialize_socket( UTPSocketImpl conn,
//const struct sockaddr *addr,
//socklen_t addrlen,
InetSocketAddress addr,
boolean need_seed_gen,
int conn_seed,
int conn_id_recv,
int conn_id_send)
{
//PackedSockAddr psaddr = PackedSockAddr((const SOCKADDR_STORAGE*)addr, addrlen);
if (need_seed_gen) {
do {
conn_seed = utp_call_get_random(conn.ctx, conn);
// we identify v1 and higher by setting the first two bytes to 0x0001
conn_seed &= 0xffff;
} while (conn.ctx.utp_sockets.get(new UTPSocketKey(addr, conn_seed)) != null );
conn_id_recv += conn_seed;
conn_id_send += conn_seed;
}
conn.state = CS_IDLE;
conn.conn_seed = conn_seed;
conn.conn_id_recv = conn_id_recv;
conn.conn_id_send = conn_id_send;
conn.addr = addr;
conn.ctx.current_ms = utp_call_get_milliseconds(conn.ctx, null);
conn.last_got_packet = conn.ctx.current_ms;
conn.last_sent_packet = conn.ctx.current_ms;
conn.last_measured_delay = conn.ctx.current_ms + 0x70000000;
conn.average_sample_time = conn.ctx.current_ms + 5000;
conn.last_rwin_decay.set(conn.ctx.current_ms - MAX_WINDOW_DECAY);
conn.our_hist.clear(conn.ctx.current_ms);
conn.their_hist.clear(conn.ctx.current_ms);
conn.rtt_hist.clear(conn.ctx.current_ms);
// initialize MTU floor and ceiling
conn.mtu_reset();
conn.mtu_last = conn.mtu_ceiling;
conn.ctx.utp_sockets.put(new UTPSocketKey(conn.addr, conn.conn_id_recv), new UTPSocketKeyData( conn ));
//System.out.println( "Sockets=" + conn.ctx.utp_sockets.size());
// we need to fit one packet in the window when we start the connection
conn.max_window = conn.get_packet_size();
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "UTP socket initialized");
//#endif
}
UTPSocketImpl utp_create_socket(utp_context ctx)
{
if(ASSERTS)_assert(ctx!=null);
if (ctx==null) return null;
UTPSocketImpl conn = new UTPSocketImpl(); // TODO: UTPSocket should have a constructor
conn.state = CS_UNINITIALIZED;
conn.ctx = ctx;
conn.userdata = null;
conn.reorder_count.set( 0 );
conn.duplicate_ack = 0;
conn.timeout_seq_nr.set( 0 );
conn.last_rcv_win = 0;
conn.got_fin = false;
conn.fast_timeout = false;
conn.rtt = 0;
conn.retransmit_timeout = 0;
conn.rto_timeout = 0;
conn.zerowindow_time = 0;
conn.average_delay = 0;
conn.current_delay_samples = 0;
conn.cur_window = 0;
conn.eof_pkt.set( 0 );
conn.last_maxed_out_window = 0;
conn.mtu_probe_seq = 0;
conn.mtu_probe_size = 0;
conn.current_delay_sum = 0;
conn.average_delay_base = 0;
conn.retransmit_count = 0;
conn.rto = 3000;
conn.rtt_var = 800;
conn.seq_nr.set( 1 );
conn.ack_nr.set( 0 );
conn.max_window_user = 255 * PACKET_SIZE;
conn.cur_window_packets.set( 0 );
conn.fast_resend_seq_nr.set( conn.seq_nr.i );
conn.target_delay = ctx.target_delay;
conn.reply_micro = 0;
conn.opt_sndbuf = ctx.opt_sndbuf;
conn.opt_rcvbuf = ctx.opt_rcvbuf;
conn.slow_start = true;
conn.ssthresh = conn.opt_sndbuf;
conn.clock_drift = 0;
conn.clock_drift_raw = 0;
conn.outbuf.mask = 15;
conn.inbuf.mask = 15;
conn.outbuf.elements = new Object[16];
conn.inbuf.elements = new Object[16];
//conn.ida = -1; // set the index of every new socket in ack_sockets to
// -1, which also means it is not in ack_sockets yet
//memset(conn->extensions, 0, sizeof(conn->extensions));
//#ifdef _DEBUG
//memset(&conn->_stats, 0, sizeof(utp_socket_stats));
//#endif
return conn;
}
int utp_context_set_option(utp_context ctx, int opt, int val)
{
if(ASSERTS)_assert(ctx!=null);
if (ctx==null) return -1;
switch (opt) {
case UTP_LOG_NORMAL:
ctx.log_normal = val!=0 ? true : false;
return 0;
case UTP_LOG_MTU:
ctx.log_mtu = val!=0 ? true : false;
return 0;
case UTP_LOG_DEBUG:
ctx.log_debug = val!=0 ? true : false;
return 0;
case UTP_TARGET_DELAY:
ctx.target_delay = val;
return 0;
case UTP_SNDBUF:
if(ASSERTS)_assert(val >= 1);
ctx.opt_sndbuf = val;
return 0;
case UTP_RCVBUF:
if(ASSERTS)_assert(val >= 1);
ctx.opt_rcvbuf = val;
return 0;
}
return -1;
}
int utp_context_get_option(utp_context ctx, int opt)
{
if(ASSERTS)_assert(ctx!=null);
if (ctx==null) return -1;
switch (opt) {
case UTP_LOG_NORMAL: return ctx.log_normal ? 1 : 0;
case UTP_LOG_MTU: return ctx.log_mtu ? 1 : 0;
case UTP_LOG_DEBUG: return ctx.log_debug ? 1 : 0;
case UTP_TARGET_DELAY: return ctx.target_delay;
case UTP_SNDBUF: return ctx.opt_sndbuf;
case UTP_RCVBUF: return ctx.opt_rcvbuf;
}
return -1;
}
int utp_setsockopt(UTPSocketImpl conn, int opt, int val)
{
if(ASSERTS)_assert(conn!=null);
if (conn==null) return -1;
switch (opt) {
case UTP_SNDBUF:
if(ASSERTS)_assert(val >= 1);
conn.opt_sndbuf = val;
return 0;
case UTP_RCVBUF:
if(ASSERTS)_assert(val >= 1);
conn.opt_rcvbuf = val;
return 0;
case UTP_TARGET_DELAY:
conn.target_delay = val;
return 0;
}
return -1;
}
int utp_getsockopt(UTPSocketImpl conn, int opt)
{
if(ASSERTS)_assert(conn!=null);
if (conn==null) return -1;
switch (opt) {
case UTP_SNDBUF: return conn.opt_sndbuf;
case UTP_RCVBUF: return conn.opt_rcvbuf;
case UTP_TARGET_DELAY: return conn.target_delay;
}
return -1;
}
// Try to connect to a specified host.
int utp_connect(UTPSocketImpl conn, InetSocketAddress addr )
{
if(ASSERTS)_assert(conn!=null);
if (conn==null) return -1;
if(ASSERTS)_assert(conn.state == CS_UNINITIALIZED);
if (conn.state != CS_UNINITIALIZED) {
conn.state = CS_DESTROY;
return -1;
}
utp_initialize_socket(conn, addr, true, 0, 0, 1);
if(ASSERTS)_assert(conn.cur_window_packets.i == 0);
if(ASSERTS)_assert(conn.outbuf.get(conn.seq_nr.i) == null);
if(ASSERTS)_assert(sizeof_PacketFormatV1 == 20);
conn.state = CS_SYN_SENT;
conn.ctx.current_ms = utp_call_get_milliseconds(conn.ctx, conn);
// Create and send a connect message
// used in parse_log.py
//conn->log(UTP_LOG_NORMAL, "UTP_Connect conn_seed:%u packet_size:%u (B) "
// "target_delay:%u (ms) delay_history:%u "
// "delay_base_history:%u (minutes)",
// conn->conn_seed, PACKET_SIZE, conn->target_delay / 1000,
// CUR_DELAY_SIZE, DELAY_BASE_HISTORY);
// Setup initial timeout timer.
conn.retransmit_timeout = 3000;
conn.rto_timeout = conn.ctx.current_ms + conn.retransmit_timeout;
conn.last_rcv_win = conn.get_rcv_window();
// if you need compatibiltiy with 1.8.1, use this. it increases attackability though.
//conn->seq_nr = 1;
conn.seq_nr.set( utp_call_get_random(conn.ctx, conn));
// Create the connect packet.
int header_size = sizeof_PacketFormatV1;
//OutgoingPacket *pkt = (OutgoingPacket*)malloc(sizeof(OutgoingPacket) - 1 + header_size);
//PacketFormatV1* p1 = (PacketFormatV1*)pkt->data;
OutgoingPacket pkt = new OutgoingPacket();
//memset(p1, 0, header_size);
// SYN packets are special, and have the receive ID in the connid field,
// instead of conn_id_send.
PacketFormatExtensionsV1 p1 = new PacketFormatExtensionsV1();
pkt.packet_header = p1;
p1.set_version(1);
p1.set_type(ST_SYN);
p1.ext = 0;
p1.connid = (short)conn.conn_id_recv;
p1.windowsize = (int)conn.last_rcv_win;
p1.seq_nr = (short)conn.seq_nr.i;
pkt.transmissions = 0;
pkt.length = header_size;
pkt.payload = 0;
/*
#if UTP_DEBUG_LOGGING
conn->log(UTP_LOG_DEBUG, "Sending connect %s [%u].",
addrfmt(conn->addr, addrbuf), conn_seed);
#endif
*/
// Remember the message in the outgoing queue.
conn.outbuf.ensure_size(conn.seq_nr.i, conn.cur_window_packets.i);
conn.outbuf.put(conn.seq_nr.i, pkt);
conn.seq_nr.inc();
conn.cur_window_packets.inc();
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "incrementing cur_window_packets:%u", conn->cur_window_packets);
//#endif
conn.send_packet(pkt);
return 0;
}
// Returns 1 if the UDP payload was recognized as a UTP packet, or 0 if it was not
int utp_process_udp(utp_context ctx,byte[] buffer, int len, InetSocketAddress addr )
{
if(ASSERTS)_assert(ctx!=null);
if (ctx==null) return 0;
if(ASSERTS)_assert(buffer!=null);
if (buffer==null) return 0;
//assert(to);
//if (!to) return 0;
/*
const PackedSockAddr addr((const SOCKADDR_STORAGE*)to, tolen);
if (len < sizeof(PacketFormatV1)) {
#if UTP_DEBUG_LOGGING
ctx->log(UTP_LOG_DEBUG, NULL, "recv %s len:%u too small", addrfmt(addr, addrbuf), (uint)len);
#endif
return 0;
}
const PacketFormatV1 *pf1 = (PacketFormatV1*)buffer;
*/
PacketFormatDeserialised deserialised = deserialise( buffer, len, false );
if ( deserialised == null ){
return( 0 );
}
PacketFormatV1 pf1 = deserialised.header;
byte version = UTP_Version(pf1);
int id = (pf1.connid)&0xffff;
if (version != 1) {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv %s len:%u version:%u unsupported version", addrfmt(addr, addrbuf), (uint)len, version);
//#endif
return 0;
}
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv %s len:%u id:%u", addrfmt(addr, addrbuf), (uint)len, id);
//ctx->log(UTP_LOG_DEBUG, NULL, "recv id:%u seq_nr:%u ack_nr:%u", id, (uint)pf1->seq_nr, (uint)pf1->ack_nr);
//#endif
byte flags = pf1.type();
if (flags == ST_RESET) {
// id is either our recv id or our send id
// if it's our send id, and we initiated the connection, our recv id is id + 1
// if it's our send id, and we did not initiate the connection, our recv id is id - 1
// we have to check every case
UTPSocketKeyData keyData;
if ( ((keyData = ctx.utp_sockets.get(new UTPSocketKey(addr, id))) != null ) ||
(((keyData = ctx.utp_sockets.get(new UTPSocketKey(addr, id + 1))) != null ) && keyData.socket.conn_id_send == id) ||
(((keyData = ctx.utp_sockets.get(new UTPSocketKey(addr, id - 1))) != null ) && keyData.socket.conn_id_send == id))
{
UTPSocketImpl conn = keyData.socket;
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv RST for existing connection");
//#endif
if (conn.state == CS_FIN_SENT)
conn.state = CS_DESTROY;
else
conn.state = CS_RESET;
utp_call_on_overhead_statistics(conn.ctx, conn, 0, len + conn.get_udp_overhead(), close_overhead);
int err = (conn.state == CS_SYN_SENT) ? UTP_ECONNREFUSED : UTP_ECONNRESET;
utp_call_on_error(conn.ctx, conn, err);
}
else {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv RST for unknown connection");
//#endif
}
return 1;
}
else if (flags != ST_SYN) {
UTPSocketImpl conn = null;
//if (ctx->last_utp_socket && ctx->last_utp_socket->addr == addr && ctx->last_utp_socket->conn_id_recv == id) {
// conn = ctx->last_utp_socket;
//} else {
UTPSocketKeyData keyData = ctx.utp_sockets.get(new UTPSocketKey(addr, id));
if (keyData != null ) {
conn = keyData.socket;
//ctx->last_utp_socket = conn;
}
//}
if (conn != null) {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv processing");
//#endif
int read = utp_process_incoming(conn, deserialised, len);
utp_call_on_overhead_statistics(conn.ctx, conn, 0, (len - read) + conn.get_udp_overhead(), header_overhead);
return 1;
}
}
// We have not found a matching utp_socket, and this isn't a SYN. Reject it.
int seq_nr = pf1.seq_nr;
if (flags != ST_SYN) {
ctx.current_ms = utp_call_get_milliseconds(ctx, null);
for (int i = 0; i < ctx.rst_info.size(); i++) {
RST_Info info = ctx.rst_info.get(i);
if ((info.connid == id) &&
(info.addr == addr) &&
(info.ack_nr == seq_nr))
{
info.timestamp = ctx.current_ms;
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv not sending RST to non-SYN (stored)");
//#endif
return 1;
}
}
if (ctx.rst_info.size() > RST_INFO_LIMIT) {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv not sending RST to non-SYN (limit at %u stored)", (uint)ctx->rst_info.GetCount());
//#endif
return 1;
}
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv send RST to non-SYN (%u stored)", (uint)ctx->rst_info.GetCount());
//#endif
//RST_Info &r = ctx->rst_info.Append();
RST_Info r = new RST_Info();
ctx.rst_info.add( r );
r.addr = addr;
r.connid = id;
r.ack_nr = (short)seq_nr;
r.timestamp = ctx.current_ms;
send_rst( ctx, addr, id, (short)seq_nr, (short)utp_call_get_random(ctx, null));
return 1;
}
if (ctx.callbacks[UTP_ON_ACCEPT] != null) {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "Incoming connection from %s", addrfmt(addr, addrbuf));
//#endif
UTPSocketKeyData keyData = ctx.utp_sockets.get(new UTPSocketKey(addr, id + 1));
if (keyData != null) {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "rejected incoming connection, connection already exists");
//#endif
return 1;
}
if (ctx.utp_sockets.size() > 3000) {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "rejected incoming connection, too many uTP sockets %d", ctx->utp_sockets->GetCount());
//#endif
return 1;
}
// true means yes, block connection. false means no, don't block.
if (utp_call_on_firewall(ctx, addr) != 0 ) {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "rejected incoming connection, firewall callback returned true");
//#endif
return 1;
}
// Create a new UTP socket to handle this new connection
UTPSocketImpl conn = utp_create_socket(ctx);
utp_initialize_socket(conn, addr, false, id, id+1, id);
conn.ack_nr.set( seq_nr );
conn.seq_nr.set( utp_call_get_random(ctx, null));
conn.fast_resend_seq_nr.set( conn.seq_nr.i );
conn.state = CS_CONNECTED;
utp_call_on_accept(ctx, conn, addr); // PARG - moved this from below to ensure userdata is setup before it is subsequently used
int read = utp_process_incoming(conn, deserialised, len, true);
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "recv send connect ACK");
//#endif
conn.send_ack(true);
// utp_call_on_accept(ctx, conn, addr);
// we report overhead after on_accept(), because the callbacks are setup now
utp_call_on_overhead_statistics(conn.ctx, conn, 0, (len - read) + conn.get_udp_overhead(), header_overhead); // SYN
utp_call_on_overhead_statistics(conn.ctx, conn, 1, conn.get_overhead(), ack_overhead); // SYNACK
}
else {
//#if UTP_DEBUG_LOGGING
//ctx->log(UTP_LOG_DEBUG, NULL, "rejected incoming connection, UTP_ON_ACCEPT callback not set");
//#endif
}
return 1;
}
// Called by utp_process_icmp_fragmentation() and utp_process_icmp_error() below
/*
static UTPSocket* parse_icmp_payload(utp_context *ctx, const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen)
{
assert(ctx);
if (!ctx) return NULL;
assert(buffer);
if (!buffer) return NULL;
assert(to);
if (!to) return NULL;
const PackedSockAddr addr((const SOCKADDR_STORAGE*)to, tolen);
// ICMP packets are only required to quote the first 8 bytes of the layer4
// payload. The UDP payload is 8 bytes, and the UTP header is another 20
// bytes. So, in order to find the entire UTP header, we need the ICMP
// packet to quote 28 bytes.
if (len < sizeof(PacketFormatV1)) {
#if UTP_DEBUG_LOGGING
ctx->log(UTP_LOG_DEBUG, NULL, "Ignoring ICMP from %s: runt length %d", addrfmt(addr, addrbuf), len);
#endif
return NULL;
}
const PacketFormatV1 *pf = (PacketFormatV1*)buffer;
const byte version = UTP_Version(pf);
const uint32 id = uint32(pf->connid);
if (version != 1) {
#if UTP_DEBUG_LOGGING
ctx->log(UTP_LOG_DEBUG, NULL, "Ignoring ICMP from %s: not UTP version 1", addrfmt(addr, addrbuf));
#endif
return NULL;
}
UTPSocketKeyData* keyData;
if ( (keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id))) ||
((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id + 1))) && keyData->socket->conn_id_send == id) ||
((keyData = ctx->utp_sockets->Lookup(UTPSocketKey(addr, id - 1))) && keyData->socket->conn_id_send == id))
{
return keyData->socket;
}
#if UTP_DEBUG_LOGGING
ctx->log(UTP_LOG_DEBUG, NULL, "Ignoring ICMP from %s: No matching connection found for id %u", addrfmt(addr, addrbuf), id);
#endif
return NULL;
}
// Should be called when an ICMP Type 3, Code 4 packet (fragmentation needed) is received, to adjust the MTU
//
// Returns 1 if the UDP payload (delivered in the ICMP packet) was recognized as a UTP packet, or 0 if it was not
//
// @ctx: utp_context
// @buf: Contents of the original UDP payload, which the ICMP packet quoted. *Not* the ICMP packet itself.
// @len: buffer length
// @to: destination address of the original UDP pakcet
// @tolen: address length
// @next_hop_mtu:
int utp_process_icmp_fragmentation(utp_context *ctx, const byte* buffer, size_t len, const struct sockaddr *to, socklen_t tolen, uint16 next_hop_mtu)
{
UTPSocket* conn = parse_icmp_payload(ctx, buffer, len, to, tolen);
if (!conn) return 0;
// Constrain the next_hop_mtu to sane values. It might not be initialized or sent properly
if (next_hop_mtu >= 576 && next_hop_mtu < 0x2000) {
conn->mtu_ceiling = min<uint32>(next_hop_mtu, conn->mtu_ceiling);
conn->mtu_search_update();
// this is something of a speecial case, where we don't set mtu_last
// to the value in between the floor and the ceiling. We can update the
// floor, because there might be more network segments after the one
// that sent this ICMP with smaller MTUs. But we want to test this
// MTU size first. If the next probe gets through, mtu_floor is updated
conn->mtu_last = conn->mtu_ceiling;
} else {
// Otherwise, binary search. At this point we don't actually know
// what size the packet that failed was, and apparently we can't
// trust the next hop mtu either. It seems reasonably conservative
// to just lower the ceiling. This should not happen on working networks
// anyway.
conn->mtu_ceiling = (conn->mtu_floor + conn->mtu_ceiling) / 2;
conn->mtu_search_update();
}
conn->log(UTP_LOG_MTU, "MTU [ICMP] floor:%d ceiling:%d current:%d", conn->mtu_floor, conn->mtu_ceiling, conn->mtu_last);
return 1;
}
// Should be called when an ICMP message is received that should tear down the connection.
//
// Returns 1 if the UDP payload (delivered in the ICMP packet) was recognized as a UTP packet, or 0 if it was not
//
// @ctx: utp_context
// @buf: Contents of the original UDP payload, which the ICMP packet quoted. *Not* the ICMP packet itself.
// @len: buffer length
// @to: destination address of the original UDP pakcet
// @tolen: address length
int utp_process_icmp_error(utp_context *ctx, const byte *buffer, size_t len, const struct sockaddr *to, socklen_t tolen)
{
UTPSocket* conn = parse_icmp_payload(ctx, buffer, len, to, tolen);
if (!conn) return 0;
const int err = (conn->state == CS_SYN_SENT) ? UTP_ECONNREFUSED : UTP_ECONNRESET;
const PackedSockAddr addr((const SOCKADDR_STORAGE*)to, tolen);
switch(conn->state) {
// Don't pass on errors for idle/closed connections
case CS_IDLE:
#if UTP_DEBUG_LOGGING
ctx->log(UTP_LOG_DEBUG, NULL, "ICMP from %s in state CS_IDLE, ignoring", addrfmt(addr, addrbuf));
#endif
return 1;
case CS_FIN_SENT:
#if UTP_DEBUG_LOGGING
ctx->log(UTP_LOG_DEBUG, NULL, "ICMP from %s in state CS_FIN_SENT, setting state to CS_DESTROY and causing error %d", addrfmt(addr, addrbuf), err);
#endif
conn->state = CS_DESTROY;
break;
default:
#if UTP_DEBUG_LOGGING
ctx->log(UTP_LOG_DEBUG, NULL, "ICMP from %s, setting state to CS_RESET and causing error %d", addrfmt(addr, addrbuf), err);
#endif
conn->state = CS_RESET;
break;
}
utp_call_on_error(conn->ctx, conn, err);
return 1;
}
*/
// Write bytes to the UTP socket. Returns the number of bytes written.
// 0 indicates the socket is no longer writable, -1 indicates an error
int utp_writev(UTPSocketImpl conn, ByteBuffer[] iovec_input, int num_iovecs)
{
//utp_iovec[] iovec = new utp_iovec[UTP_IOV_MAX];
if(ASSERTS)_assert(conn);
if (conn==null) return -1;
if(ASSERTS)_assert(iovec_input);
if (iovec_input==null) return -1;
if(ASSERTS)_assert(num_iovecs);
if (num_iovecs==0) return -1;
if (num_iovecs > UTP_IOV_MAX)
num_iovecs = UTP_IOV_MAX;
//memcpy(iovec, iovec_input, sizeof(struct utp_iovec)*num_iovecs);
/*
for ( int i=0;i<num_iovecs;i++){
iovec[i] = new utp_iovec( iovec_input[i] );
}
*/
int bytes = 0;
int sent = 0;
for (int i = 0; i < num_iovecs; i++)
bytes += iovec_input[i].remaining();
//#if UTP_DEBUG_LOGGING
//size_t param = bytes;
//#endif
if (conn.state != CS_CONNECTED) {
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = false (not CS_CONNECTED)", (uint)bytes);
//#endif
return 0;
}
conn.ctx.current_ms = utp_call_get_milliseconds(conn.ctx, conn);
// don't send unless it will all fit in the window
int packet_size = conn.get_packet_size();
int num_to_send = Math.min(bytes, packet_size);
while (!conn.is_full(num_to_send)) {
// Send an outgoing packet.
// Also add it to the outgoing of packets that have been sent but not ACKed.
bytes -= num_to_send;
sent += num_to_send;
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "Sending packet. seq_nr:%u ack_nr:%u wnd:%u/%u/%u rcv_win:%u size:%u cur_window_packets:%u",
// conn->seq_nr, conn->ack_nr,
// (uint)(conn->cur_window + num_to_send),
// (uint)conn->max_window, (uint)conn->max_window_user,
// (uint)conn->last_rcv_win, num_to_send,
// conn->cur_window_packets);
//#endif
conn.write_outgoing_packet(num_to_send, ST_DATA, iovec_input, num_iovecs);
num_to_send = Math.min(bytes, packet_size);
if (num_to_send == 0) {
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = true", (uint)param);
//#endif
return sent;
}
}
boolean full = conn.is_full();
if (full) {
// mark the socket as not being writable.
conn.state = CS_CONNECTED_FULL;
}
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "UTP_Write %u bytes = %s", (uint)bytes, full ? "false" : "true");
//#endif
// returns whether or not the socket is still writable
// if the congestion window is not full, we can still write to it
//return !full;
return sent;
}
void utp_read_drained(UTPSocketImpl conn)
{
if(ASSERTS)_assert(conn);
if (conn==null) return;
if(ASSERTS)_assert(conn.state != CS_UNINITIALIZED);
if (conn.state == CS_UNINITIALIZED) return;
int rcvwin = conn.get_rcv_window();
if (rcvwin > conn.last_rcv_win) {
// If last window was 0 send ACK immediately, otherwise should set timer
if (conn.last_rcv_win == 0) {
conn.send_ack();
} else {
conn.ctx.current_ms = utp_call_get_milliseconds(conn.ctx, conn);
conn.schedule_ack();
}
}
}
// Should be called each time the UDP socket is drained
void utp_issue_deferred_acks(utp_context ctx)
{
if(ASSERTS)_assert(ctx);
if (ctx==null) return;
//for (size_t i = 0; i < ctx->ack_sockets.GetCount(); i++) {
// UTPSocket *conn = ctx->ack_sockets[i];
// conn->send_ack();
// i--; // PARG - weird code, send_ack removes from list...
//}
if ( ctx.ack_sockets.size() > 0 ){
List<UTPSocketImpl> temp = new ArrayList<UTPSocketImpl>( ctx.ack_sockets );
for ( UTPSocketImpl conn: temp ){
//System.out.println( "Sending deferred ACK for " + conn.addr + "/" + conn.ack_nr.i ); // PARG
conn.send_ack();
}
}
}
// Should be called every 500ms
void utp_check_timeouts(utp_context ctx)
{
if(ASSERTS)_assert(ctx);
if (ctx==null) return;
ctx.current_ms = utp_call_get_milliseconds(ctx, null);
if (ctx.current_ms - ctx.last_check < TIMEOUT_CHECK_INTERVAL)
return;
ctx.last_check = ctx.current_ms;
/*
for (size_t i = 0; i < ctx->rst_info.GetCount(); i++) {
if ((int)(ctx->current_ms - ctx->rst_info[i].timestamp) >= RST_INFO_TIMEOUT) {
ctx->rst_info.MoveUpLast(i);
i--;
}
}
if (ctx->rst_info.GetCount() != ctx->rst_info.GetAlloc()) {
ctx->rst_info.Compact();
}
*/
if ( ctx.rst_info.size() > 0 ){
Iterator<RST_Info> rst_it = ctx.rst_info.iterator();
while( rst_it.hasNext()){
RST_Info info = rst_it.next();
if ((int)(ctx.current_ms - info.timestamp) >= RST_INFO_TIMEOUT) {
rst_it.remove();
}
}
}
/*
utp_hash_iterator_t it;
UTPSocketKeyData* keyData;
while ((keyData = ctx->utp_sockets->Iterate(it))) {
UTPSocket *conn = keyData->socket;
conn->check_timeouts();
// Check if the object was deleted
if (conn->state == CS_DESTROY) {
#if UTP_DEBUG_LOGGING
conn->log(UTP_LOG_DEBUG, "Destroying");
#endif
delete conn;
}
}
*/
if ( ctx.utp_sockets.size() > 0 ){
Iterator<UTPSocketKeyData> socket_it = ctx.utp_sockets.values().iterator();
List<UTPSocketImpl> to_free = new ArrayList<UTPSocketImpl>();
while( socket_it.hasNext()){
UTPSocketImpl socket = socket_it.next().socket;
socket.check_timeouts();
// Check if the object was deleted
if (socket.state == CS_DESTROY) {
//LOG_UTPV("0x%08x: Destroying", conn);
//UTP_Free(conn);
//i--;
to_free.add( socket );
}
}
for ( UTPSocketImpl s: to_free ){
s.UTP_Free();
}
}
}
/*
int utp_getpeername(utp_socket *conn, struct sockaddr *addr, socklen_t *addrlen)
{
assert(addr);
if (!addr) return -1;
assert(addrlen);
if (!addrlen) return -1;
assert(conn);
if (!conn) return -1;
assert(conn->state != CS_UNINITIALIZED);
if (conn->state == CS_UNINITIALIZED) return -1;
socklen_t len;
const SOCKADDR_STORAGE sa = conn->addr.get_sockaddr_storage(&len);
*addrlen = min(len, *addrlen);
memcpy(addr, &sa, *addrlen);
return 0;
}
*/
/*
int utp_get_delays(UTPSocket *conn, uint32 *ours, uint32 *theirs, uint32 *age)
{
assert(conn);
if (!conn) return -1;
assert(conn->state != CS_UNINITIALIZED);
if (conn->state == CS_UNINITIALIZED) {
if (ours) *ours = 0;
if (theirs) *theirs = 0;
if (age) *age = 0;
return -1;
}
if (ours) *ours = conn->our_hist.get_value();
if (theirs) *theirs = conn->their_hist.get_value();
if (age) *age = conn->ctx->current_ms - conn->last_measured_delay;
return 0;
}
*/
// Close the UTP socket.
// It is not valid for the upper layer to refer to socket after it is closed.
// Data will keep to try being delivered after the close.
void utp_close(UTPSocketImpl conn)
{
if(ASSERTS)_assert(conn);
if (conn==null) return;
assert(conn.state != CS_UNINITIALIZED
&& conn.state != CS_DESTROY_DELAY
&& conn.state != CS_FIN_SENT
&& conn.state != CS_DESTROY);
//#if UTP_DEBUG_LOGGING
//conn->log(UTP_LOG_DEBUG, "UTP_Close in state:%s", statenames[conn->state]);
//#endif
switch(conn.state) {
case CS_CONNECTED:
case CS_CONNECTED_FULL:
conn.state = CS_FIN_SENT;
conn.write_outgoing_packet(0, ST_FIN, null, 0);
break;
case CS_SYN_SENT:
conn.rto_timeout = utp_call_get_milliseconds(conn.ctx, conn) + Math.min(conn.rto * 2, 60);
// fall through
case CS_GOT_FIN:
conn.state = CS_DESTROY_DELAY;
break;
default:
conn.state = CS_DESTROY;
break;
}
}
/*
utp_context* utp_get_context(utp_socket *socket) {
assert(socket);
return socket ? socket->ctx : NULL;
}
void* utp_set_userdata(utp_socket *socket, void *userdata) {
assert(socket);
if (socket) socket->userdata = userdata;
return socket ? socket->userdata : NULL;
}
void* utp_get_userdata(utp_socket *socket) {
assert(socket);
return socket ? socket->userdata : NULL;
}
void struct_utp_context::log(int level, utp_socket *socket, char const *fmt, ...)
{
switch (level) {
case UTP_LOG_NORMAL: if (!log_normal) return;
case UTP_LOG_MTU: if (!log_mtu) return;
case UTP_LOG_DEBUG: if (!log_debug) return;
}
va_list va;
char buf[4096];
va_start(va, fmt);
vsnprintf(buf, 4096, fmt, va);
buf[4095] = '\0';
va_end(va);
utp_call_log(this, socket, (const byte *)buf);
}
utp_socket_stats* utp_get_stats(utp_socket *socket)
{
#ifdef _DEBUG
assert(socket);
if (!socket) return NULL;
socket->_stats.mtu_guess = socket->mtu_last ? socket->mtu_last : socket->mtu_ceiling;
return &socket->_stats;
#else
return NULL;
#endif
}
*/
public void
UTP_CheckTimeouts()
{
utp_check_timeouts( global_ctx );
}
public void
UTP_IncomingIdle()
{
utp_issue_deferred_acks( global_ctx );
}
public boolean
isValidPacket(
byte[] buffer,
int len )
{
PacketFormatDeserialised deserialised = deserialise( buffer, len, true );
if ( deserialised == null ){
return( false );
}
return( true );
}
public UTPSocket
UTP_Create()
throws UTPProviderException
{
return( utp_create_socket( global_ctx ));
}
public void
UTP_SetUserData(
UTPSocket conn,
Object user_data )
throws UTPProviderException
{
((UTPSocketImpl)conn).userdata = user_data;
}
public void
UTP_Connect(
UTPSocket conn,
InetSocketAddress address )
throws UTPProviderException
{
UTPSocketImpl socket = (UTPSocketImpl)conn;
if ( utp_connect( socket, address ) != 0 ){
utp_close( socket );
throw( new UTPProviderException( "Connect failed" ));
}
}
public boolean
UTP_IsIncomingUTP(
UTPGotIncomingConnection incoming_proc,
SendToProc send_to_proc,
Object send_to_userdata,
byte[] buffer,
int len,
InetSocketAddress addr )
{
return( utp_process_udp( global_ctx, buffer, len, addr) != 0 );
}
public void
UTP_GetPeerName(
UTPSocket conn,
InetSocketAddress[] addr_out )
{
if (ASSERTS)_assert(conn!=null);
addr_out[0] = ((UTPSocketImpl)conn).addr;
}
public int
UTP_GetSocketConnectionID( UTPSocket _conn )
{
UTPSocketImpl conn = (UTPSocketImpl)_conn;
return( conn.conn_id_recv );
}
public boolean
UTP_Write(
UTPSocket conn,
int bytes )
{
Debug.out( "Not Supported" );
return( false );
}
public boolean
UTP_Write(
UTPSocket conn,
ByteBuffer[] buffers,
int start,
int len )
throws UTPProviderException
{
ByteBuffer[] b;
if ( start == 0 ){
b = buffers;
}else{
b = new ByteBuffer[len];
System.arraycopy( buffers, start, b, 0, len );
}
int res = utp_writev( (UTPSocketImpl)conn, b, len );
if ( res < 0 ){
throw( new UTPProviderException( "Write failed" ));
}
return( res > 0 );
}
public void
UTP_RBDrained(
UTPSocket conn )
{
utp_read_drained((UTPSocketImpl)conn);
}
public void
UTP_Close(
UTPSocket conn )
{
utp_close((UTPSocketImpl)conn);
}
// unsupported V1 methods
public UTPSocket
UTP_Create(
SendToProc send_to_proc,
Object send_to_userdata,
InetSocketAddress addr )
throws UTPProviderException
{
throw( new UTPProviderException( "Not Supported" ));
}
public void
UTP_Connect(
UTPSocket conn )
throws UTPProviderException
{
throw( new UTPProviderException( "Not Supported" ));
}
public void
UTP_SetCallbacks(
UTPSocket conn,
UTPFunctionTable funcs,
Object userdata )
throws UTPProviderException
{
throw( new UTPProviderException( "Not Supported" ));
}
private int
convertOption(
int po )
{
if ( po == UTPProvider.OPT_RECEIVE_BUFFER ){
return( UTP_RCVBUF );
}else if ( po == UTPProvider.OPT_SEND_BUFFER ){
return( UTP_SNDBUF );
}else{
Debug.out( "derp" );
return( 0 );
}
}
public void
UTP_SetOption(
int provider_option,
int value )
{
int option = convertOption( provider_option );
boolean is_buff_opt = option == UTP_SNDBUF || option == UTP_RCVBUF;
if ( is_buff_opt ){
value *= 1024;
}
int existing = utp_context_get_option( global_ctx, option );
if ( existing == value ){
return;
}
utp_context_set_option( global_ctx, option, value );
if ( is_buff_opt ){
for ( UTPSocketKeyData data: global_ctx.utp_sockets.values()){
UTPSocketImpl socket = data.socket;
if ( utp_getsockopt(socket, option ) == existing ){
utp_setsockopt( socket, option , value );
}
}
}
}
public int
UTP_GetOption(
int provider_option )
{
int option = convertOption( provider_option );
int value = utp_context_get_option( global_ctx, option );
if ( option == UTP_SNDBUF || option == UTP_RCVBUF ){
value /= 1024;
}
return( value );
}
}