components/open-fabrics/rds-tools/patches/base.patch
changeset 369 cc8c00719da9
child 715 eed3ed08f692
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/open-fabrics/rds-tools/patches/base.patch	Tue Jul 05 19:16:33 2011 -0700
@@ -0,0 +1,1466 @@
+diff -r -u /tmp/rds-tools-2.0.4/rds-stress.c rds-tools-2.0.7/rds-stress.c
+--- /tmp/rds-tools-2.0.4/rds-stress.c	Wed Aug  4 15:25:10 2010
++++ rds-tools-2.0.7/rds-stress.c	Thu Feb 24 13:27:52 2011
+@@ -15,7 +15,13 @@
+ #include <sys/time.h>
+ #include <time.h>
+ #include <inttypes.h>
++#if defined(__SVR4) && defined(__sun)
++#include <sys/syscall.h>
++#include <signal.h>
++#include <sys/lgrp_user.h>
++#else
+ #include <syscall.h>
++#endif
+ #include <sys/stat.h>
+ #include <sys/poll.h>
+ #include <ctype.h>
+@@ -22,8 +28,13 @@
+ #include <fcntl.h>
+ #include <sched.h>
+ #include <getopt.h>
++#if !(defined(__SVR4) && defined(__sun))
+ #include <byteswap.h>
+ #include "rds.h"
++#else
++#include <infiniband/ofa_solaris.h>
++#include <sys/rds.h>
++#endif
+ 
+ #include "pfhack.h"
+ 
+@@ -110,6 +121,7 @@
+ struct child_control {
+ 	pid_t pid;
+ 	int ready;
++	int stopping;
+ 	struct timeval start;
+ 	struct counter cur[NR_STATS];
+ 	struct counter last[NR_STATS];
+@@ -254,7 +266,20 @@
+ 
+ 	die("invalid host name or dotted quad '%s'\n", ptr);
+ }
++#if defined(__SVR4) && defined(__sun)
++static lgrp_id_t lgrp_id = -1;
+ 
++static void
++set_my_lgrp(void)
++{
++	if (lgrp_id != -1) {
++		lgrp_affinity_set(P_LWPID, P_MYID, lgrp_id,
++			LGRP_AFF_STRONG);
++		yield(); /* force a context switch */
++	}
++}
++#endif
++
+ static void usage(void)
+ {
+         fprintf(stderr, "rds-stress version %s\n", RDS_VERSION);
+@@ -281,6 +306,9 @@
+ 	" -c                measure cpu use with per-cpu soak processes\n"
+ 	" -V                trace execution\n"
+ 	" -z                print a summary at end of test only\n"
++#if defined(__SVR4) && defined(__sun)
++	" -g [lgrpid]       bind the process to the specified lgrp\n"
++#endif
+ 	"\n"
+ 	"Example:\n"
+ 	"  recv$ rds-stress\n"
+@@ -310,7 +338,7 @@
+ static void check_parent(pid_t pid)
+ {
+ 	if (pid != getppid())
+-		die("parent %u exited\n", pid);
++		die("parent %u exited\n", (int)pid);
+ }
+ 
+ /*
+@@ -334,6 +362,7 @@
+ 		msg_pattern[i] = k;
+ }
+ 
++#if !(defined(__SVR4) && defined(__sun))
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
+ #define htonll(x)	bswap_64(x)
+ #define ntohll(x)	bswap_64(x)
+@@ -341,6 +370,7 @@
+ #define htonll(x)	(x)
+ #define ntohll(x)	(x)
+ #endif
++#endif /* Not sun */
+ 
+ static void encode_hdr(struct header *dst, const struct header *hdr)
+ {
+@@ -584,7 +614,11 @@
+ 	if (opts->receive_addr == 0)
+ 		return 1;
+ 
++#if defined(__SVR4) && defined(__sun)
++	sin.sin_family = AF_INET_OFFLOAD;
++#else
+ 	sin.sin_family = AF_INET;
++#endif
+ 	sin.sin_port = htons(opts->starting_port);
+ 	sin.sin_addr.s_addr = htonl(opts->receive_addr);
+ 
+@@ -677,7 +711,11 @@
+ 	size = sizeof(struct rdma_key_o_meter)
+ 			+ 2 * nr_tasks * sizeof(*kt)
+ 			+ 2 * RDMA_MAX_TRACKED_KEYS * sizeof(*ks);
++#if defined(__SVR4) && defined(__sun)
++	base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
++#else
+ 	base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, 0, 0);
++#endif
+ 	if (base == MAP_FAILED)
+ 		die_errno("alloc_rdma_buffers: mmap failed");
+ 
+@@ -828,7 +866,7 @@
+ 	}
+ 
+ 	if (!failed)
+-		trace("compare pass pattern %Lx addr %p\n",
++		trace("compare pass pattern 0x%Lx addr %p\n",
+ 			(unsigned long long) pattern, addr);
+ }
+ 
+@@ -865,7 +903,11 @@
+ 	/* We use mmap here rather than malloc, because it is always
+ 	 * page aligned. */
+ 	len = 2 * opts->nr_tasks * opts->req_depth * (opts->rdma_vector * opts->rdma_size) + sys_page_size;
++#if defined(__SVR4) && defined(__sun)
++	base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
++#else	
+ 	base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
++#endif
+ 	if (base == MAP_FAILED)
+ 		die_errno("alloc_rdma_buffers: mmap failed");
+ 	memset(base, 0x2f, len);
+@@ -915,17 +957,16 @@
+ 	if (RDMA_OP_READ == hdr->rdma_op) {
+ 		if (opt.verify)
+ 			rds_fill_buffer(rdma_addr, rdma_size, hdr->rdma_pattern);
+-		trace("Requesting RDMA read for pattern %Lx "
+-				"local addr to rdma read %p\n",
+-				(unsigned long long) hdr->rdma_pattern,
++		trace("Requesting RDMA read for pattern 0x%Lx"
++				"local addr to rdma read 0x%p\n",
++				hdr->rdma_pattern,
+ 				rdma_addr);
+ 	} else {
+ 		if (opt.verify)
+ 			rds_fill_buffer(rdma_addr, rdma_size, 0);
+-		trace("Requesting RDMA write for pattern %Lx "
+-				"local addr to rdma write %p\n",
+-				(unsigned long long) hdr->rdma_pattern,
+-				rdma_addr);
++
++		trace("Requesting RDMA write for pattern 0x%Lx",
++				hdr->rdma_pattern);
+ 	}
+ }
+ 
+@@ -947,7 +988,7 @@
+ 		die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op);
+ 
+ 
+-	trace("RDS received request to issue rdma %s len %lu rva %Lx key %Lx pattern %Lx\n",
++	trace("RDS received request to issue rdma %s len %lu rva 0x%Lx key 0x%Lx pattern 0x%Lx\n",
+ 		in_hdr->rdma_op == RDMA_OP_WRITE? "write to" : "read from",
+ 		rdma_size,
+ 		(unsigned long long) in_hdr->rdma_addr,
+@@ -1007,6 +1048,9 @@
+ 	t->drain_rdmas = 0;
+ }
+ 
++#if defined(__SVR4) && defined(__sun)
++#undef MSG_MAXIOVLEN
++#endif
+ #define MSG_MAXIOVLEN 2
+ 
+ /*
+@@ -1560,7 +1604,12 @@
+ 	struct timeval start;
+         int do_work = opts->simplex ? active : 1;
+ 
++#if defined(__SVR4) && defined(__sun)
++	set_my_lgrp();
++	sin.sin_family = AF_INET_OFFLOAD;
++#else
+ 	sin.sin_family = AF_INET;
++#endif
+ 	sin.sin_port = htons(opts->starting_port + 1 + id);
+ 	sin.sin_addr.s_addr = htonl(opts->receive_addr);
+ 
+@@ -1572,7 +1621,11 @@
+ 	for (i = 0; i < opts->nr_tasks; i++) {
+ 		tasks[i].nr = i;
+ 		tasks[i].src_addr = sin;
++#if defined(__SVR4) && defined(__sun)
++		tasks[i].dst_addr.sin_family = AF_INET_OFFLOAD;
++#else
+ 		tasks[i].dst_addr.sin_family = AF_INET;
++#endif
+ 		tasks[i].dst_addr.sin_addr.s_addr = htonl(opts->send_addr);
+ 		tasks[i].dst_addr.sin_port = htons(opts->starting_port + 1 + i);
+ 		tasks[i].send_time = alloca(opts->req_depth * sizeof(struct timeval));
+@@ -1625,6 +1678,10 @@
+ 				;
+ 		}
+ 
++		/* stop sending if in shutdown phase */
++		if (ctl->stopping)
++			continue;
++
+ 		/* keep the pipeline full */
+ 		can_send = !!(pfd.revents & POLLOUT);
+ 		for (i = 0, t = tasks; i < opts->nr_tasks; i++, t++) {
+@@ -1665,8 +1722,12 @@
+ 	uint32_t i;
+ 
+ 	len = opts->nr_tasks * sizeof(*ctl);
++#if defined(__SVR4) && defined(__sun)
++	ctl = (struct child_control *)mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
++#else
+ 	ctl = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED,
+ 		   0, 0);
++#endif
+ 	if (ctl == MAP_FAILED)
+ 		die("mmap of %u child control structs failed", opts->nr_tasks);
+ 
+@@ -1699,7 +1760,7 @@
+ 			continue;
+ 		pid = waitpid(-1, NULL, WNOHANG);
+ 		if (pid)
+-			die("child %u (pid %u) exited\n", i, pid);
++			die("child %u (pid %u) exited\n", i, (int)pid);
+ 		sleep(1);
+ 		i--; /* try this child again */
+ 	}
+@@ -1967,7 +2028,7 @@
+ 
+ 	pid = waitpid(-1, &status, wflags);
+ 	if (pid < 0)
+-		die("waitpid returned %u", pid);
++		die("waitpid returned %u", (int)pid);
+ 	if (pid == 0)
+ 		return 0;
+ 
+@@ -1975,15 +2036,15 @@
+ 		if (WEXITSTATUS(status) == 0)
+ 			return 1;
+ 		die("child pid %u exited with status %d\n",
+-				pid, WEXITSTATUS(status));
++				(int)pid, WEXITSTATUS(status));
+ 	}
+ 	if (WIFSIGNALED(status)) {
+ 		if (WTERMSIG(status) == SIGTERM)
+ 			return 1;
+ 		die("child pid %u exited with signal %d\n",
+-				pid, WTERMSIG(status));
++				(int)pid, WTERMSIG(status));
+ 	}
+-	die("child pid %u wait status %d\n", pid, status);
++	die("child pid %u wait status %d\n", (int)pid, status);
+ }
+ 
+ static void release_children_and_wait(struct options *opts,
+@@ -2139,7 +2200,12 @@
+ 	control_fd = -1;
+ 
+ 	if (nr_running) {
++		/* let everything gracefully stop before we kill the chillins */
+ 		for (i = 0; i < opts->nr_tasks; i++)
++			ctl[i].stopping = 1;
++		sleep(1);
++
++		for (i = 0; i < opts->nr_tasks; i++)
+ 			kill(ctl[i].pid, SIGTERM);
+ 		stop_soakers(soak_arr);
+ 	}
+@@ -2517,7 +2583,11 @@
+ 	/* an extra terminating entry which will be all 0s */
+ 	len = (nr_soak + 1) * sizeof(struct soak_control);
+ 	soak_arr = mmap(NULL, len, PROT_READ|PROT_WRITE,
++#if defined(__SVR4) && defined(__sun)
++			MAP_ANONYMOUS|MAP_SHARED, -1, 0);
++#else
+ 			MAP_ANONYMOUS|MAP_SHARED, 0, 0);
++#endif
+ 	if (soak_arr == MAP_FAILED)
+ 		die("mmap of %ld soak control structs failed", nr_soak);
+ 
+@@ -2589,6 +2659,7 @@
+ { "rtprio",		no_argument,		NULL,	'R'	},
+ { "verify",		no_argument,		NULL,	'v'	},
+ { "trace",		no_argument,		NULL,	'V'	},
++{ "lgrpid",		required_argument,	NULL,	'g'	},
+ 
+ { "rdma-use-once",	required_argument,	NULL,	OPT_RDMA_USE_ONCE },
+ { "rdma-use-get-mr",	required_argument,	NULL,	OPT_RDMA_USE_GET_MR },
+@@ -2652,7 +2723,7 @@
+ 	while(1) {
+ 		int c, index;
+ 
+-		c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVz",
++		c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVg:z",
+ 				long_options, &index);
+ 		if (c == -1)
+ 			break;
+@@ -2711,6 +2782,10 @@
+ 			case 'V':
+ 				opts.tracing = 1;
+ 				break;
++			case 'g':
++				lgrp_id = (lgrp_id_t)parse_ull(optarg,
++				    (uint32_t)~0);
++				break;
+ 			case OPT_USE_CONG_MONITOR:
+ 				opts.use_cong_monitor = parse_ull(optarg, 1);
+ 				break;
+@@ -2786,6 +2861,7 @@
+ 	if (opts.rdma_size && 0)
+ 		opts.rdma_size = (opts.rdma_size + 4095) & ~4095;
+ 
++	set_my_lgrp();
+ 	opt = opts;
+ 	return active_parent(&opts, soak_arr);
+ }
+diff -r -u /tmp/rds-tools-2.0.4/pfhack.h rds-tools-2.0.7/pfhack.h
+--- /tmp/rds-tools-2.0.4/pfhack.h	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/pfhack.h	Thu Feb 24 13:27:51 2011
+@@ -44,9 +44,11 @@
+ #ifndef __PF_HACK_H
+ #define __PF_HACK_H
+ 
++#if !((defined(__SVR4) && defined(__sun)))
+ #define PF_RDS		21
+ #define AF_RDS		21
+ #define SOL_RDS		276
++#endif
+ 
+ extern int discover_pf_rds();
+ extern int discover_sol_rds();
+diff -r -u /tmp/rds-tools-2.0.4/rds-info.c rds-tools-2.0.7/rds-info.c
+--- /tmp/rds-tools-2.0.4/rds-info.c	Wed Aug  4 15:25:10 2010
++++ rds-tools-2.0.7/rds-info.c	Thu Feb 24 13:27:51 2011
+@@ -42,16 +42,27 @@
+ #include <sys/types.h>
+ #include <sys/socket.h>
+ #include <errno.h>
++#if defined(__SVR4) && defined(__sun)
++#include <strings.h>
++#else
+ #include <string.h>
++#endif
+ #include <inttypes.h>
+ #include <netinet/in.h>
+ #include <arpa/inet.h>
+ 
++#if defined(__SVR4) && defined(__sun)
++#include <sys/rds.h>
++#else
+ #include "rds.h"
++#endif
+ #include "pfhack.h"
+ 
++/* WHUPS changed the struct rds_info_connection definition b/w rds in 1.4 & 1.5. gotta support both
++   for now. TODO remove check of transport[15] once ofed pre-1.5 is extinct. */
+ #define rds_conn_flag(conn, flag, letter) \
+-	(conn.flags & RDS_INFO_CONNECTION_FLAG_##flag ? letter : '-')
++	(conn.flags & RDS_INFO_CONNECTION_FLAG_##flag \
++	|| conn.transport[15] & RDS_INFO_CONNECTION_FLAG_##flag ? letter : '-')
+ 
+ #define min(a, b) (a < b ? a : b)
+ #define array_size(foo) (sizeof(foo) / sizeof(foo[0]))
+@@ -234,8 +245,10 @@
+ 		print_msgs, "Send", 0 },
+ 	['t'] = { RDS_INFO_RETRANS_MESSAGES, "retransmit queue messages",
+ 		  print_msgs, "Retransmit", 0 },
++#if !(defined(__SVR4) && defined(__sun))
+ 	['T'] = { RDS_INFO_TCP_SOCKETS, "TCP transport sockets",
+ 		  print_tcp_socks, NULL, 0 },
++#endif
+ 	['I'] = { RDS_INFO_IB_CONNECTIONS, "IB transport connections",
+ 		  print_ib_conns, NULL, 0 },
+ };
+@@ -266,6 +279,10 @@
+ 	char optstring[258] = "v+";
+ 	int given_options = 0;
+ 	socklen_t len = 0;
++#if defined(__SVR4) && defined(__sun)
++	socklen_t ulen;
++	struct rds_info_arg arg;
++#endif
+ 	void *data = NULL;
+ 	int fd;
+ 	int each;
+@@ -322,6 +339,7 @@
+ 		    (given_options && !infos[i].option_given))
+ 			continue;
+ 
++#if !(defined(__SVR4) && defined(__sun))
+ 		/* read in the info until we get a full snapshot */
+ 		while ((each = getsockopt(fd, sol, infos[i].opt_val, data,
+ 				   &len)) < 0) {
+@@ -345,15 +363,60 @@
+ 				return 1;
+ 			}
+ 		}
++#else
++		/* 1st call gets the length of the data available */
++		ulen = 0;
++		bzero(&arg, sizeof (struct rds_info_arg));
++		arg.lenp = (uint64_t)(uintptr_t)&ulen;
++		arg.datap = NULL;
++		each = ioctl(fd, infos[i].opt_val, &arg);
++		if ((each < 0) && (errno != ENOSPC)) {
++			verbosef(0, stderr, "%s: Unable get statistics: %s\n",
++			    progname, strerror(errno));
++			return 1;
++		}
+ 
++		/* No data at the driver */
++		if (ulen == 0)
++			invalid_opt = 1;;
++#endif
++
+ 		if (invalid_opt)
+ 			continue;
+ 
++#if !(defined(__SVR4) && defined(__sun))
+ 		infos[i].print(data, each, len, infos[i].extra);
++#else
++		do {
++			arg.datap = (uint64_t)(uintptr_t)realloc(
++			    (char *)(uintptr_t)arg.datap, ulen);
++			if (arg.datap == NULL) {
++			    verbosef(0, stderr, "%s: Unable to allocate memory "
++				"for %u bytes of info: %s\n",
++				progname, ulen, strerror(errno));
++				return 1;
++			}
+ 
++			/* 2nd call gets the data */
++			len = ulen;
++			each = ioctl(fd, infos[i].opt_val, &arg);
++			if ((each < 0) && (errno != ENOSPC)) {
++				verbosef(0, stderr,
++				    "%s: Unable get statistics: %s\n",
++				    progname, strerror(errno));
++				return 1;
++			}
++		} while (ulen > len);
++
++		infos[i].print((void *)(uintptr_t)arg.datap, each, ulen,
++		    infos[i].extra);
++#endif
+ 		if (given_options && --given_options == 0)
+ 			break;
+ 	}
++#if defined(__SVR4) && defined(__sun)
++	free((void *)(uintptr_t)arg.datap);
++#endif
+ 
+ 	return 0;
+ }
+diff -r -u /tmp/rds-tools-2.0.4/rds.7 rds-tools-2.0.7/rds.7
+--- /tmp/rds-tools-2.0.4/rds.7	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/rds.7	Thu Feb 24 13:27:52 2011
+@@ -6,6 +6,7 @@
+ .nf
+ .B #include <sys/socket.h>
+ .B #include <netinet/in.h>
++.B #include <sys/rds.h>
+ .fi
+ .SH DESCRIPTION
+ This is an implementation of the RDS socket API. It provides reliable,
+@@ -14,18 +15,13 @@
+ Currently, RDS can be transported over Infiniband, and loopback.
+ RDS over TCP is disabled, but will be re-enabled in the near future.
+ .PP
+-RDS uses standard
+-.B AF_INET
+-addresses as described in
+-.BR ip (7)
++RDS uses 
++.B AF_INET_OFFLOAD address family 
+ to identify end points.
+ .\"------------------------------------------------------------------
+ .SS Socket Creation
+ RDS is still in development and as such does not have a reserved protocol
+-family constant. Applications must read the string representation of the
+-protocol family value from the
+-.B pf_rds
+-sysctl parameter file described below.
++family constant.  Applications should use AF_INET_OFFLOAD.
+ .PP
+ .nf
+ .B rds_socket = socket(pf_rds, SOCK_SEQPACKET, 0);
+@@ -58,9 +54,6 @@
+ .BR SOL_RDS ).
+ Just as with the RDS protocol family, an official value has not been
+ assigned yet, so the kernel will assign a value dynamically.
+-The assigned value can be retrieved from the
+-.B sol_rds
+-sysctl parameter file.
+ .PP
+ RDS specific socket options will be described in a separate section
+ below.
+@@ -77,7 +70,7 @@
+ .PP
+ For instance, when binding to the address of an Infiniband interface
+ such as
+-.BR ib0 ,
++.BR ibd0 ,
+ the socket will use the Infiniband transport. If RDS is not able
+ to associate a transport with the given address, it will return
+ .BR EADDRNOTAVAIL .
+@@ -394,47 +387,6 @@
+ be delivered in the order they're sent. Messages sent from different
+ sockets, or to different destinations, may be delivered in any order.
+ .\"------------------------------------------------------------------
+-.SH SYSCTL VALUES
+-These parameteres may only be accessed through their files in
+-.BR /proc/sys/net/rds .
+-Access through
+-.BR sysctl (2)
+-is not supported.
+-.TP
+-.B pf_rds
+-This file contains the string representation of the protocol family
+-constant passed to
+-.BR socket (2)
+-to create a new RDS socket.
+-.TP
+-.B sol_rds
+-This file contains the string representation of the socket level parameter
+-that is passed to
+-.BR getsockopt (2)
+-and
+-.BR setsockopt (2)
+-to manipulate RDS socket options.
+-.TP
+-.BR max_unacked_bytes " and " max_unacked_packets
+-These parameters are used to tune the generation of acknowledgements. By
+-default, the system receiving RDS messages does not send back explicit
+-acknowledgements unless it transmits a message of its own (in which
+-case the ACK is piggybacked onto the outgoing message), or when the sending
+-system requests an ACK.
+-.IP
+-However, the sender needs to see an ACK from time to time so that it
+-can purge old messages from the send queue. The unacked bytes and
+-packet counters are used to keep track of how much data has been
+-sent without requesting an ACK. The default is to request an acknowledgement
+-every 16 packets, or every 16 MB, whichever comes first.
+-.TP
+-.BR reconnect_delay_min_ms " and " reconnect_delay_max_ms
+-RDS uses host-to-host connections to transport RDS messages (both for the TCP
+-and the Infiniband transport). If this connection breaks, RDS will try to
+-re-establish the connection. Because this reconnect may be triggered by
+-both hosts at the same time and fail, RDS uses a random backoff before attempting
+-a reconnect. These two parameters specify the minimum and maximum delay in
+-milliseconds. The default values are 1 and 1000, respectively.
+ .SH SEE ALSO
+ .BR rds-rdma (7),
+ .BR socket (2),
+diff -r -u /tmp/rds-tools-2.0.4/rds-info.1 rds-tools-2.0.7/rds-info.1
+--- /tmp/rds-tools-2.0.4/rds-info.1	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/rds-info.1	Thu Feb 24 13:27:51 2011
+@@ -1,162 +1,150 @@
+-.Dd October 30, 2006
+-.Dt RDS-INFO 1
+-.Os
+-.Sh NAME
+-.Nm rds-info
+-.Nd display information from the RDS kernel module
+-.Pp
+-.Sh SYNOPSIS
+-.Nm rds-info
+-.Op Fl v
+-.Bk -words
+-.Op Fl cknrstIT
++.TH RDS-INFO 1 "October 30, 2006"
++.SH "NAME"
++rds-info - display information from the RDS kernel module
++.SH SYNOPSIS
++.B rds-info [-cknrstIT]
+ 
+-.Sh DESCRIPTION
+-The
+-.Nm
+-utility presents various sources of information that
++.SH DESCRIPTION
++.PP
++The utility presents various sources of information that
+ the RDS kernel module maintains.  When run without any optional arguments
+-.Nm
+ will output all the information it knows of.  When options are specified then
+ only the information associated with those options is displayed.
+ 
+ The options are as follows:
+-.Bl -tag -width Ds
+-.It Fl v
++.SH OPTIONS
++.PP
++.TP 7
++\fB\-v
+ Requests verbose output. When this option is given, some classes of information
+ will display additional data.
+ 
+-.It Fl c
++.TP
++\fB\-c
+ Display global counters.  Each counter increments as its event
+ occurs.  The counters may not be reset.  The set of supported counters
+ may change over time.
+ 
+-.Bl -tag -width 4
+-.It CounterName
++.IP	CounterName
+ The name of the counter.  These names come from the kernel and can change
+ depending on the capability of the kernel module.
+-.It Value
++.IP 	Value
+ The number of times that the counter has been incremented since the kernel
+ module was loaded.
+-.El
+ 
+-.It Fl k
++.TP
++\fB\-k\fR
+ Display all the RDS sockets in the system.  There will always be one socket
+ listed that is neither bound to nor connected to any addresses because
+-.Nm
+ itself uses an unbound socket to collect information.
+ 
+-.Bl -tag -width 4
+-.It BoundAddr, BPort
++.IP	BoundAddr, BPort
+ The IP address and port that the socket is bound to.  0.0.0.0 0 indicates that
+ the socket has not been bound.
+-.It ConnAddr, CPort
++.IP	ConnAddr, CPort
+ The IP address and port that the socket is connected to.  0.0.0.0 0 indicates
+ that the socket has not been connected.
+-.It SndBuf, RcvBuf
++.IP	SndBuf, RcvBuf
+ The number of bytes of message payload which can be queued for sending or
+ receiving on the socket, respectively.
+-.It Inode
++.IP	Inode
+ The number of the inode object associated with the socket. Can be used to
+ locate the process owning a given socket by searching /proc/*/fd for
+ open files referencing a socket with this inode number.
+-.El
+ 
+-.It Fl n
++.TP
++\fB\-n\fR
+ Display all RDS connections.  RDS connections are maintained between
+ nodes by transports.  
+ 
+-.Bl -tag -width 4
+-.It LocalAddr
++.IP	LocalAddr
+ The IP address of this node.  For connections that originate and terminate on
+ the same node the local address indicates which address initiated the
+ connection establishment.
+-.It RemoteAddr
++.IP	RemoteAddr
+ The IP address of the remote end of the connection.  
+-.It NextTX
++.IP	NextTX
+ The sequence number that will be given to the next message that is sent
+ over the connection.
+-.It NextRX
++.IP	NextRX
+ The sequence number that is expected from the next message to arrive over
+ the connection.  Any incoming messages with sequence numbers less than this
+ will be dropped.
+-.It Flg
++.IP	Flg
+ Flags which indicate the state of the connection. 
+-.Bl -tag -width 4
+-.It s
+-A process is currently sending a message down the connection.
+-.It c
+-The transport is attempting to connect to the remote address.
+-.It C
+-The connection to the remote host is connected and active.
+-.El
+-.El
+ 
+-.It Fl r, Fl s, Fl t
++.IP 		s
++	A process is currently sending a message down
++	the connection.
++.IP 		c
++	The transport is attempting to connect to the
++	remote address.
++.IP 		C
++	The connection to the remote host is connected
++	and active.
++
++.TP
++\fB\-r\fR, \fB\-s\fR, \fB\-t\fR
+ Display the messages in the receive, send, or retransmit queues respectively.
+-.Bl -tag -width 4
+-.It LocalAddr, LPort
++
++.IP	LocalAddr, LPort
+ The local IP address and port on this node associated with the message. For
+ sent messages this is the source address, for receive messages it is the
+ destination address.
+-.It RemoteAddr, RPort
++.IP	RemoteAddr, RPort
+ The remote IP address and port associated with the message. For sent messages
+ this is the destination address, for receive messages it is the source address.
+-.It Seq
++.IP	Seq
+ The sequence number of the message.
+-.It Bytes
++.IP	Bytes
+ The number of bytes in the message payload.
+-.El
+ 
++
++.PP
+ The following information sources are dependent on specific transports which
+ may not always be available. 
+ 
+-.It Fl I
++.TP 7
++\fB\-I\fR
+ Display the IB connections which the IB transport is using to provide
+ RDS connections.
+ 
+-.Bl -tag -width 4
+-.It LocalAddr
++.IP	LocalAddr
+ The local IP address of this connection.
+-.It RemoteAddr
++.IP	RemoteAddr
+ The remote IP address of this connection.
+-.It LocalDev
++.IP	LocalDev
+ The local IB Global Identifier, printed in IPv6 address syntax.
+-.It RemoteDev
++.IP	RemoteDev
+ The remote IB Global Identifier, printed in IPv6 address syntax.
+-.El
+ 
+ If verbose output is requested, per-connection settings such as the
+ maximum number of send and receive work requests will be displayed
+ in addition.
+ 
+-.It Fl T
++.TP 7
++\fB\-T\fR
+ Display the TCP sockets which the TCP transport is using to provide
+ RDS connections.
+ 
+-.Bl -tag -width 4
+-.It LocalAddr, LPort
++.IP	LocalAddr, LPort
+ The local IP address and port of this socket.
+-.It RemoteAddr, RPort
++.IP	RemoteAddr, RPort
+ The remote IP address and port that this socket is connected to.
+-.It HdrRemain
++.IP	HdrRemain
+ The number of bytes that must be read off the socket to complete the next
+ full RDS header.
+-.It DataRemain
++.IP	DataRemain
+ The number of bytes that must be read off the socket to complete the data
+ payload of the message which is being received.
+-.It SentNxt
++.IP	SentNxt
+ The TCP sequence number of the first byte of the last message that we sent
+ down the connection.
+-.It ExpectedUna
++.IP	ExpectedUna
+ The TCP sequence number of the byte past the last byte of the last message
+ that we sent down the connection.  When we see that the remote side has
+ acked up to this byte then we know that the remote side has received all
+ our RDS messages.
+-.It SeenUna
++.IP	SeenUna
+ The TCP sequence number of the byte past the last byte which has been
+ acked by the remote host.
+-.El
+-
+-.El
+-.Pp
+diff -r -u /tmp/rds-tools-2.0.4/rds-ping.1 rds-tools-2.0.7/rds-ping.1
+--- /tmp/rds-tools-2.0.4/rds-ping.1	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/rds-ping.1	Thu Feb 24 13:27:52 2011
+@@ -1,69 +1,54 @@
+-.Dd Apr 22, 2008
+-.Dt RDS-PING 1
+-.Os
+-.Sh NAME
+-.Nm rds-ping
+-.Nd test reachability of remote node over RDS
+-.Pp
+-.Sh SYNOPSIS
+-.Nm rds-ping
+-.Bk -words
+-.Op Fl c Ar count
+-.Op Fl i Ar interval
+-.Op Fl I Ar local_addr
+-.Ar remote_addr
++.TH RDS-PING 1 "Apr 22, 2008"
++.SH NAME
++rds-ping - test reachability of remote node over RDS
+ 
+-.Sh DESCRIPTION
+-.Nm rds-ping
+-is used to test whether a remote node is reachable over RDS.
+-Its interface is designed to operate pretty much the standard
+-.Xr ping 8
++.SH SYNOPSIS
++.B rds-ping [-c count] [-i interval] [-I local_addr] remote_addr
++
++.SH DESCRIPTION
++.PP
++rds-ping is used to test whether a remote node is reachable over RDS.
++Its interface is designed to operate pretty much the standard ping(1M) 
+ utility, even though the way it works is pretty different.
+-.Pp
+-.Nm rds-ping
+-opens several RDS sockets and sends packets to port 0 on
++.PP
++rds-ping opens several RDS sockets and sends packets to port 0 on
+ the indicated host. This is a special port number to which
+ no socket is bound; instead, the kernel processes incoming
+ packets and responds to them.
+-.Sh OPTIONS
++.SH OPTIONS
+ The following options are available for use on the command line:
+-.Bl -tag -width Ds
+-.It Fl c Ar count
+-Causes
+-.Nm rds-ping
+-to exit after sending (and receiving) the specified number of
++.PP
++.TP 7
++\fB\-c count
++Causes rds-ping to exit after sending (and receiving) the specified number of
+ packets.
+-.It Fl I Ar address
+-By default,
+-.Nm rds-ping
+-will pick the local source address for the RDS socket based
++.TP
++\fB\-I address
++By default, rds-ping will pick the local source address for the RDS socket based
+ on routing information for the destination address (i.e. if
+ packets to the given destination would be routed through interface
+-.Nm ib0 ,
++ib0 ,
+ then it will use the IP address of
+-.Nm ib0
++ib0
+ as source address).
+ Using the
+ .Fl I
+ option, you can override this choice.
+-.It Fl i Ar timeout
+-By default,
+-.Nm rds-ping
+-will wait for one second between sending packets. Use this option
++.TP
++\fB\-i timeout
++By default, rds-ping will wait for one second between sending packets. Use this option
+ to specified a different interval. The timeout value is given in
+ seconds, and can be a floating point number. Optionally, append
+-.Nm msec
++msec
+ or
+-.Nm usec
++usec
+ to specify a timeout in milliseconds or microseconds, respectively.
+-.It
++.IP
+ Specifying a timeout considerably smaller than the packet round-trip
+ time will produce unexpected results.
+-.El
+-.Sh AUTHORS
+-.Nm rds-ping
++
++.SH AUTHORS
++rds-ping
+ was written by Olaf Kirch <[email protected]>.
+-.Sh SEE ALSO
+-.Xr rds 7 ,
+-.Xr rds-info 1 ,
+-.Xr rds-stress 1 .
++.S~ SEE ALSO
++rds 7, rds-info 1, rds-stress 1.
+diff -r -u /tmp/rds-tools-2.0.4/configure.in rds-tools-2.0.7/configure.in
+--- /tmp/rds-tools-2.0.4/configure.in	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/configure.in	Thu Feb 24 13:27:51 2011
+@@ -1,7 +1,7 @@
+ AC_PREREQ(2.55)
+ AC_INIT()
+ 
+-VERSION=2.0.4
++VERSION=2.0.7
+ RELEASE=1
+ 
+ AC_SUBST(VERSION)
+diff -r -u /tmp/rds-tools-2.0.4/rds-ping.c rds-tools-2.0.7/rds-ping.c
+--- /tmp/rds-tools-2.0.4/rds-ping.c	Wed Aug  4 15:25:10 2010
++++ rds-tools-2.0.7/rds-ping.c	Thu Feb 24 13:27:52 2011
+@@ -48,7 +48,11 @@
+ #include <sys/poll.h>
+ #include <fcntl.h>
+ #include <getopt.h>
++#if defined(__SVR4) && defined(__sun)
++#include <sys/rds.h>
++#else
+ #include "rds.h"
++#endif
+ 
+ #include "pfhack.h"
+ 
+@@ -155,7 +159,12 @@
+ 	}
+ 
+ 	memset(&sin, 0, sizeof(sin));
++#if defined(__SVR4) && defined(__sun)
++	sin.sin_family = AF_INET_OFFLOAD;
++#else
+ 	sin.sin_family = AF_INET;
++#endif
++
+ 	sin.sin_addr = opt_dstaddr;
+ 
+ 	gettimeofday(&next_ts, NULL);
+@@ -181,7 +190,7 @@
+ 				break;
+ 
+ 			timeradd(&next_ts, &opt_wait, &next_ts);
+-			if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)))
++			if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)) < 0)
+ 				err = errno;
+ 			sp->sent_id = ++sent;
+ 			sp->sent_ts = now;
+@@ -258,7 +267,11 @@
+ 	int pf;
+ 
+ 	memset(&sin, 0, sizeof(sin));
++#if defined(__SVR4) && defined(__sun)
++	sin.sin_family = AF_INET_OFFLOAD;
++#else
+ 	sin.sin_family = AF_INET;
++#endif
+ 
+ #ifdef DYNAMIC_PF_RDS
+         pf = discover_pf_rds();
+@@ -278,6 +291,9 @@
+ 		if (ufd < 0)
+ 			die_errno("unable to create UDP socket");
+ 		sin.sin_addr = *dst;
++#if defined(__SVR4) && defined(__sun)
++		sin.sin_family = AF_INET;
++#endif
+ 		sin.sin_port = htons(1);
+ 		if (connect(ufd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
+ 			die_errno("unable to connect to %s",
+@@ -289,6 +305,9 @@
+ 
+ 		*src = sin.sin_addr;
+ 		close(ufd);
++#if defined(__SVR4) && defined(__sun)
++		sin.sin_family = AF_INET_OFFLOAD;
++#endif
+ 	}
+ 
+ 	sin.sin_addr = *src;
+diff -r -u /tmp/rds-tools-2.0.4/Makefile.in rds-tools-2.0.7/Makefile.in
+--- /tmp/rds-tools-2.0.4/Makefile.in	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/Makefile.in	Thu Feb 24 13:27:51 2011
+@@ -4,10 +4,14 @@
+ mandir		= $(DESTDIR)@mandir@
+ incdir		= $(DESTDIR)@includedir@
+ 
++CC=gcc
++
+ all: all-programs
+ 
+-CFLAGS = -O2 -Wall -Iinclude
+-CPPFLAGS = -DDEBUG_EXE -DRDS_VERSION=\"@VERSION@\" -MD -MP -MF $(@D)/.$(basename $(@F)).d
++CFLAGS += -O2 -Wall -Iinclude
++CPPFLAGS += -D_XOPEN_SOURCE=500 -D__EXTENSIONS__ \
++	-DDEBUG_EXE -DRDS_VERSION=\"@VERSION@\" -MD -MP -MF $(@D)/.$(basename $(@F)).d
++LDFLAGS += -lsocket -lnsl -llgrp
+ 
+ HEADERS = kernel-list.h pfhack.h include/rds.h
+ COMMON_SOURCES = pfhack.c
+@@ -15,7 +19,7 @@
+ CLEAN_OBJECTS = $(addsuffix .o,$(PROGRAMS)) $(subst .c,.o,$(COMMON_SOURCES))
+ 
+ # This is the default
+-DYNAMIC_PF_RDS = true
++#DYNAMIC_PF_RDS = true
+ 
+ ifneq ($(DYNAMIC_PF_RDS),)
+ CPPFLAGS += -DDYNAMIC_PF_RDS
+@@ -29,14 +33,14 @@
+ all-programs: $(PROGRAMS)
+ 
+ install: $(PROGRAMS)
+-	install -d $(bindir)
+-	install -m 555 -s $(PROGRAMS) $(bindir)
+-	install -d $(mandir)/man1
+-	install -d $(mandir)/man7
+-	install -m 644 *.1 $(mandir)/man1
+-	install -m 644 *.7 $(mandir)/man7
+-	install -d $(incdir)/net
+-	install -m 444 include/rds.h $(incdir)/net
++	$(INSTALL) -d $(bindir)
++	$(INSTALL) -m 755 -s $(PROGRAMS) $(bindir)
++	$(INSTALL) -d $(mandir)/man1
++	$(INSTALL) -d $(mandir)/man7
++	$(INSTALL) -m 644 *.1 $(mandir)/man1
++	$(INSTALL) -m 644 *.7 $(mandir)/man7
++	$(INSTALL) -d $(incdir)/net
++	$(INSTALL) -m 444 include/rds.h $(incdir)/net
+ 
+ clean:
+ 	rm -f $(PROGRAMS) $(CLEAN_OBJECTS)
+@@ -47,7 +51,7 @@
+ 
+ 
+ $(PROGRAMS) : % : %.o $(COMMON_OBJECTS)
+-	gcc $(CFLAGS) $(LDFLAGS) -o $@ $^
++	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $^
+ 
+ LOCAL_DFILES := $(wildcard .*.d)
+ ifneq ($(LOCAL_DFILES),)
+@@ -72,8 +76,6 @@
+ 		configure \
+ 		README \
+ 		rds-tools.txt \
+-		stap/rds.stp \
+-		stap/README \
+ 		docs/rds-architecture.txt \
+ 		examples/Makefile \
+ 		examples/rds-sample.c \
+diff -r -u /tmp/rds-tools-2.0.4/examples/rds-sample.c rds-tools-2.0.7/examples/rds-sample.c
+--- /tmp/rds-tools-2.0.4/examples/rds-sample.c	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/examples/rds-sample.c	Thu Feb 24 13:27:53 2011
+@@ -26,6 +26,7 @@
+ #include <string.h>
+ #include <stdlib.h>
+ 
++#if !(defined(__SVR4) && defined(__sun))
+ /* FIXME - this is a hack to getaround RDS not exporting any header files.
+  * This is a local copy of the file found at net/rds/
+  */
+@@ -33,6 +34,9 @@
+ /* These are defined in rds.h....but that file is not happily included */
+ #define SOL_RDS		272
+ #define PF_RDS		28
++#else
++#include <sys/rds.h>
++#endif
+ 
+ 
+ #define TESTPORT	4000
+@@ -107,12 +111,12 @@
+ 	cmsg->cmsg_type = RDS_CMSG_RDMA_ARGS;
+ 	cmsg->cmsg_len = CMSG_LEN(sizeof(struct rds_rdma_args));
+ 
+-	iov.addr = (uint64_t) buf;
++	iov.addr = (uint64_t)(uintptr_t)buf;
+ 	iov.bytes = sizeof(struct rdss_message);
+ 
+ 	args->remote_vec.addr = 0;
+ 	args->remote_vec.bytes = sizeof(struct rdss_message);
+-	args->local_vec_addr = (uint64_t) &iov;
++	args->local_vec_addr = (uint64_t)(uintptr_t)&iov;
+ 	args->nr_local = 1;
+ 	args->flags = remote_flags ? (RDS_RDMA_READWRITE | RDS_RDMA_FENCE) : 0;
+ 	args->flags |= RDS_RDMA_NOTIFY_ME;
+@@ -244,9 +248,9 @@
+ 	void *ctlbuf;
+ 	struct iovec *iov;
+ 
+-	mr_args.vec.addr = (uint64_t) buf;
++	mr_args.vec.addr = (uint64_t)(uintptr_t)buf;
+ 	mr_args.vec.bytes = sizeof(struct rdss_message);
+-	mr_args.cookie_addr = (uint64_t) cookie;
++	mr_args.cookie_addr = (uint64_t)(uintptr_t)cookie;
+ 	mr_args.flags = RDS_RDMA_USE_ONCE;
+ 
+ 	ctlbuf = calloc(1, CMSG_SPACE(sizeof(mr_args)));
+diff -r -u /tmp/rds-tools-2.0.4/examples/Makefile rds-tools-2.0.7/examples/Makefile
+--- /tmp/rds-tools-2.0.4/examples/Makefile	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/examples/Makefile	Thu Feb 24 13:27:52 2011
+@@ -1,6 +1,12 @@
++CC=gcc
++LIBS = -lsocket -lnsl
++CPPFLAGS += -D_XOPEN_SOURCE=500 -D__EXTENSIONS__
+ 
+ all: rds-sample
+ 
+-rds-sample: rds-sample.o
++rds-sample: rds-sample.c
++	$(CC) $(CPPFLAGS) $(CFLAGS) -o rds-sample rds-sample.c $(LIBS)
+ 
+-CFLAGS = -I ../include
++clean:
++	rm -rf rds-sample
++
+diff -r -u /tmp/rds-tools-2.0.4/configure rds-tools-2.0.7/configure
+--- /tmp/rds-tools-2.0.4/configure	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/configure	Thu Feb 24 13:27:51 2011
+@@ -1215,7 +1215,7 @@
+ 
+ 
+ 
+-VERSION=2.0.4
++VERSION=2.0.7
+ RELEASE=1
+ 
+ 
+diff -r -u /tmp/rds-tools-2.0.4/rds-stress.1 rds-tools-2.0.7/rds-stress.1
+--- /tmp/rds-tools-2.0.4/rds-stress.1	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/rds-stress.1	Thu Feb 24 13:27:52 2011
+@@ -1,99 +1,103 @@
+-.Dd May 15, 2007
+-.Dt RDS-STRESS 1
+-.Os
+-.Sh NAME
+-.Nm rds-stress
+-.Nd send messages between processes over RDS sockets
+-.Pp
+-.Sh SYNOPSIS
+-.Nm rds-stress
+-.Bk -words
+-.Op Fl p Ar port_number
+-.Op Fl r Ar receive_address
+-.Op Fl s Ar send_address
+-.Op Fl a Ar ack_bytes
+-.Op Fl q Ar request_bytes
+-.Op Fl D Ar rdma_bytes
+-.Op Fl d Ar queue_depth
+-.Op Fl t Ar nr_tasks
+-.Op Fl c
+-.Op Fl R
+-.Op Fl V
+-.Op Fl v
++.TH RDS-STRESS 1 " May 15, 2007"
++.SH "NAME"
++rds-stress - send messages between processes over RDS sockets
++.PP
++.SH SYNOPSIS
++.HP
++.nf
++rds-stress [-p port_number] -r [receive_address] [-s send_address]
++      [-a ack_bytes] [-q request_bytes] [-D rdma_bytes]
++      [-d queue_depth] [-t Ar nr_tasks] [-c] [-R] [-V] [-v] [-o] 
++      [-I iovecs] -M [nr] [-z] [-g lgrpid]
++.fi
+ 
+-.Sh DESCRIPTION
+-.Nm rds-stress
++
++.SH DESCRIPTION
++.PP
++.Nm
++rds-stress
+ sends messages between groups tasks, usually running on seperate
+ machines.
+-.Pp
++.PP
+ First a passive receiving instance is started.
+-.Pp
+-.Dl $ rds-stress
+-.Pp
++.RS 12
++
++	$ rds-stress
++.RE
++.PP
+ Then an active sending instance is started, giving it
+ the address and port at which it will find a listening
+ passive receiver.  In addition, it is given configuration options which
+ both instances will use.
+-.Pp
+-.Dl $ rds-stress -s recvhost -p 4000 -t 1 -d 1
+-.Pp
++.PP
++.RS 12
++	$ rds-stress -s recvhost -p 4000 -t 1 -d 1
++.RE
++.PP
+ The active sender will parse the options, connect to the passive receiver, and
+ send the options over this connection.  From this point on both instances
+ exhibit the exact same behaviour.
+-.Pp
++.PP
+ They will create a number of child tasks as specified by the -t option.  Once
+ the children are created the parent sleeps for a second at a time, printing a
+ summary of statistics at each interval. 
+-.Pp
++.PP
+ Each child will open an RDS socket, each binding to a port number in order
+ after the port number given on the command line.  The first child would bind to
+ port 4001 in our example.  Each child sets the send and receive buffers to
+ exactly fit the number of messages, requests and acks, that will be in flight
+ as determind by the command line arguments.
+-.Pp
++.PP
+ The children then enter their loop.  They will keep a number of sent messages
+ outstanding as specified by the -d option.  When they reach this limit they
+ will wait to receive acks which will allow them to send again.  As they receive
+ messages from their peers they immediately send acks.
+-.Pp
++.PP
+ Every second, the parent process will display statistics of the ongoing
+ stress test. The output is described in section OUTPUT below.
+-.Pp
++.PP
+ If the -T option is given, the test will terminate after the specified time,
+ and a summary is printed.
+-.Pp
++.PP
+ Each child maintains outstanding messages to all other children of the other instance.
+ They do not send to their siblings.
+-.Sh OPTIONS
++.SH OPTIONS
++.PP
+ The following options are available for use on the command line:
+-.Bl -tag -width Ds
+-.It Fl p Ar port_number
++
++.TP 7
++\fB\-p  port_number
+ Each parent binds a TCP socket to this port number and their respective
+ address.  They will trade the negotiated options over this socket.  Each
+ child will bind an RDS socket to the range of ports immediately following
+ this port number, for as many children as there are.
+-.It Fl s Ar send_address
++.TP
++\fB\-s send_address
+ A connection attempt is made to this address.  Once its complete and the
+ options are sent over it then children will be created and work will proceed.
+-.It Fl r Ar receive_address
++.TP
++\fB\-r receive_address
+ This specifies the address that messages will be sent from.  If -s is not
+ specified then rds-stress waits for a connection on this address before
+ proceeding.
+-.Pp
++
+ If this option is not given, rds-stress will choose an appropriate address.
+ The passive process will accept connections on all local interfaces, and
+ obtain the address once the control connection is established.
+ The active process will choose a local address based on the interface through
+ which it connects to the destination address.
+-.It Fl a Ar ack_bytes
++.TP
++\fB\-a ack_bytes
+ This specifies the size of the ack messages, in bytes. There is a minimum size
+ which depends on the format of the ack messages, which may change over time.
+ See section "Message Sizes" below.
+-.It Fl q Ar request_bytes
++.TP
++\fB\-q request_bytes
+ This specifies the size of the request messages, in bytes.
+ It also has a minimum size which may change over time.
+ See section "Message Sizes" below.
+-.It Fl D Ar rdma_bytes
++.TP
++\fB\-D rdma_bytes
+ RDSv3 is capable of transmitting part of a message via RDMA directly from
+ application buffer to application buffer. This option enables RDMA support
+ in rds-stress: request packets include parameters for an RDMA READ or WRITE
+@@ -100,20 +104,25 @@
+ operation, which the receiving process executes at the time the ACK packet
+ is sent.
+ See section "Message Sizes" below.
+-.It Fl d Ar queue_depth
++.TP
++\fB\-d queue_depth
+ Each child will try to maintain this many sent messages outstanding to each
+ of its peers on the remote address.
+-.It Fl t Ar nr_tasks
++.TP
++\fB\-t nr_tasks
+ Each parent will create this many children tasks.
+-.It Fl T Ar seconds
++.TP
++\fB\-T seconds
+ Specify the duration of the test run. After the specified number of seconds,
+ all processes on both ends of the connection will terminate, and the
+ active instance will print a summary. By default, rds-stress will keep
+ on sending and receiving messages.
+-.It Fl z
++.TP
++\fB\-z
+ This flag can be used in conjunction with -T. It suppresses the ongoing
+ display of statistics, and prints a summary only.
+-.It Fl c
++.TP
++\fB\-c
+ This causes rds-stress to create child tasks which just consume CPU cycles.
+ One task is created for each CPU in the system.  First each child observes the
+ maximum rate at which it can consume cycles.  This means that this option
+@@ -121,50 +130,78 @@
+ use of the system by observing the lesser rate at which the children consume
+ cycles.  This option is *not* shared between the active and passive instances.
+ It must be specified on each rds-stress command line.
+-.It Fl R
++.TP
++\fB\-R
+ This tells the rds-stress parent process to run with SCHED_RR priority,
+ giving it precedence over the child processes. This is useful when running
+ with lots of tasks, where there is a risk of the child processes starving
+ the parent, and skewing the results.
+-.It Fl v
++.TP
++\fB\-v
+ With this option enabled, packets are filled with a pattern that is
+ verified by the receiver. This check can help detect data corruption
+ occuring under high load.
+-.El
+-.Pp
++.TP
++\fB\-o
++Datagrams sent one way only (default is both)
++.TP
++\fB\-I iovecs
++RDMA: number of user buffers to target (default is 1, max is 512)
++.TP
++\fB\-M nr
++RDMA: mode (0=readwrite,1=readonly,2=writeonly)
++.TP
++\fB\-g lgrpid
++bind the process to the specified lgrp
++.PP
+ 
+-.Ss Message Sizes
++.SS Message Sizes
+ Options which set a message size (such as -a) specify a number of bytes
+ by default. By appending \fBK\fP, \fBM\fP, or \fBG\fP, you can specify the size
+ in kilobytes, megabytes or gigabytes, respectively. For instance,
+ the following will run rds-stress with a message and ACK size of 1024
+ bytes, and an RDMA message size of 1048576 bytes:
+-.Pp
+-.Dl rds-stress ... -q 1K -a 1K -D 1M
+-.Pp
+-.Pp
+-.Sh OUTPUT
++.PP
++.RS 12
++rds-stress ... -q 1K -a 1K -D 1M
++.RE
++.PP
++.PP
++.SH OUTPUT
+ Each parent outputs columns of statistics at a regular interval:
+-.Bl -tag -width Ds
+-.It tsks
++.TP 8
++tsks
+ The number of child tasks which are running.
+-.It tx/s
++.TP
++tx/s
+ The number of sendmsg() calls that all children are executing, per second. 
+-.It tx+rx K/s
++.TP
++rx/s
++The number of recvmsg() calls that all children are executing, per second. 
++.TP
++tx+rx K/s
+ The total number of bytes that are flowing through sendmsg() and recvmsg() for all children.
+ This includes both request and ack messages.
+-.It rw+rr K/s
+-The total number of bytes that are being transferred via RDMA READs and
++.TP
++mbi K/s
++The total number of bytes that are being received via RDMA READs and
+ WRITEs for all children.
+-.It tx us/c
++.TP
++mbi K/s
++The total number of bytes that are being transmited via RDMA READs and
++WRITEs for all children.
++.TP
++tx us/c
+ The average number of microseconds spent in sendmsg() calls.
+-.It rtt us
++.TP
++rtt us
+ The average round trip time for a request and ack message pair.  This measures
+ the total time between when a task sends a request and when it finally receives
+ the ack for that message.  Because it includes the time it takes for the
+ receiver to wake up, receive the message, and send an ack, it can grow to be
+ quite large under load. 
+-.It cpu %
++.TP
++cpu %
+ This is the percentage of available CPU resources on this machine that are being
+ consumed since rds-stress started running.  It will show -1.00 if -c is not
+ given.  It is calculated based on the amount of CPU resources that CPU soaking
+@@ -171,4 +208,3 @@
+ tasks are able to consume.  This lets it measure CPU use by the system, say in
+ interrupt handlers, that task-based CPU accounting does not include.
+ For this to work rds-stress must be started with -c on an idle system.
+-.El
+diff -r -u /tmp/rds-tools-2.0.4/include/rds.h rds-tools-2.0.7/include/rds.h
+--- /tmp/rds-tools-2.0.4/include/rds.h	Wed Aug  4 15:25:11 2010
++++ rds-tools-2.0.7/include/rds.h	Thu Feb 24 13:30:23 2011
+@@ -84,6 +84,8 @@
+ #define RDS_CMSG_CONG_UPDATE		5
+ #define RDS_CMSG_ATOMIC_FADD		6
+ #define RDS_CMSG_ATOMIC_CSWP		7
++#define	RDS_CMSG_MASKED_ATOMIC_FADD	8
++#define	RDS_CMSG_MASKED_ATOMIC_CSWP	9
+ 
+ #define RDS_INFO_FIRST			10000
+ #define RDS_INFO_COUNTERS		10000
+@@ -252,8 +254,25 @@
+ 	rds_rdma_cookie_t cookie;
+ 	uint64_t 	local_addr;
+ 	uint64_t 	remote_addr;
+-	uint64_t	swap_add;
+-	uint64_t	compare;
++	union {
++		struct {
++			uint64_t	compare;
++			uint64_t	swap;
++		} cswp;
++		struct {
++			uint64_t	add;
++		} fadd;
++		struct {
++			uint64_t	compare;
++			uint64_t	swap;
++			uint64_t	compare_mask;
++			uint64_t	swap_mask;
++		} m_cswp;
++		struct {
++			uint64_t	add;
++			uint64_t	nocarry_mask;
++		} m_fadd;
++	};
+ 	u_int64_t	flags;
+ 	u_int64_t	user_token;
+ };
+@@ -278,5 +297,6 @@
+ #define RDS_RDMA_USE_ONCE	0x0008	/* free MR after use */
+ #define RDS_RDMA_DONTWAIT	0x0010	/* Don't wait in SET_BARRIER */
+ #define RDS_RDMA_NOTIFY_ME	0x0020	/* Notify when operation completes */
++#define RDS_RDMA_SILENT		0x0040	/* Do not interrupt remote */
+ 
+ #endif /* IB_RDS_H */