components/open-fabrics/rds-tools/patches/base.patch
changeset 369 cc8c00719da9
child 715 eed3ed08f692
equal deleted inserted replaced
368:9a01d3a61f01 369:cc8c00719da9
       
     1 diff -r -u /tmp/rds-tools-2.0.4/rds-stress.c rds-tools-2.0.7/rds-stress.c
       
     2 --- /tmp/rds-tools-2.0.4/rds-stress.c	Wed Aug  4 15:25:10 2010
       
     3 +++ rds-tools-2.0.7/rds-stress.c	Thu Feb 24 13:27:52 2011
       
     4 @@ -15,7 +15,13 @@
       
     5  #include <sys/time.h>
       
     6  #include <time.h>
       
     7  #include <inttypes.h>
       
     8 +#if defined(__SVR4) && defined(__sun)
       
     9 +#include <sys/syscall.h>
       
    10 +#include <signal.h>
       
    11 +#include <sys/lgrp_user.h>
       
    12 +#else
       
    13  #include <syscall.h>
       
    14 +#endif
       
    15  #include <sys/stat.h>
       
    16  #include <sys/poll.h>
       
    17  #include <ctype.h>
       
    18 @@ -22,8 +28,13 @@
       
    19  #include <fcntl.h>
       
    20  #include <sched.h>
       
    21  #include <getopt.h>
       
    22 +#if !(defined(__SVR4) && defined(__sun))
       
    23  #include <byteswap.h>
       
    24  #include "rds.h"
       
    25 +#else
       
    26 +#include <infiniband/ofa_solaris.h>
       
    27 +#include <sys/rds.h>
       
    28 +#endif
       
    29  
       
    30  #include "pfhack.h"
       
    31  
       
    32 @@ -110,6 +121,7 @@
       
    33  struct child_control {
       
    34  	pid_t pid;
       
    35  	int ready;
       
    36 +	int stopping;
       
    37  	struct timeval start;
       
    38  	struct counter cur[NR_STATS];
       
    39  	struct counter last[NR_STATS];
       
    40 @@ -254,7 +266,20 @@
       
    41  
       
    42  	die("invalid host name or dotted quad '%s'\n", ptr);
       
    43  }
       
    44 +#if defined(__SVR4) && defined(__sun)
       
    45 +static lgrp_id_t lgrp_id = -1;
       
    46  
       
    47 +static void
       
    48 +set_my_lgrp(void)
       
    49 +{
       
    50 +	if (lgrp_id != -1) {
       
    51 +		lgrp_affinity_set(P_LWPID, P_MYID, lgrp_id,
       
    52 +			LGRP_AFF_STRONG);
       
    53 +		yield(); /* force a context switch */
       
    54 +	}
       
    55 +}
       
    56 +#endif
       
    57 +
       
    58  static void usage(void)
       
    59  {
       
    60          fprintf(stderr, "rds-stress version %s\n", RDS_VERSION);
       
    61 @@ -281,6 +306,9 @@
       
    62  	" -c                measure cpu use with per-cpu soak processes\n"
       
    63  	" -V                trace execution\n"
       
    64  	" -z                print a summary at end of test only\n"
       
    65 +#if defined(__SVR4) && defined(__sun)
       
    66 +	" -g [lgrpid]       bind the process to the specified lgrp\n"
       
    67 +#endif
       
    68  	"\n"
       
    69  	"Example:\n"
       
    70  	"  recv$ rds-stress\n"
       
    71 @@ -310,7 +338,7 @@
       
    72  static void check_parent(pid_t pid)
       
    73  {
       
    74  	if (pid != getppid())
       
    75 -		die("parent %u exited\n", pid);
       
    76 +		die("parent %u exited\n", (int)pid);
       
    77  }
       
    78  
       
    79  /*
       
    80 @@ -334,6 +362,7 @@
       
    81  		msg_pattern[i] = k;
       
    82  }
       
    83  
       
    84 +#if !(defined(__SVR4) && defined(__sun))
       
    85  #if __BYTE_ORDER == __LITTLE_ENDIAN
       
    86  #define htonll(x)	bswap_64(x)
       
    87  #define ntohll(x)	bswap_64(x)
       
    88 @@ -341,6 +370,7 @@
       
    89  #define htonll(x)	(x)
       
    90  #define ntohll(x)	(x)
       
    91  #endif
       
    92 +#endif /* Not sun */
       
    93  
       
    94  static void encode_hdr(struct header *dst, const struct header *hdr)
       
    95  {
       
    96 @@ -584,7 +614,11 @@
       
    97  	if (opts->receive_addr == 0)
       
    98  		return 1;
       
    99  
       
   100 +#if defined(__SVR4) && defined(__sun)
       
   101 +	sin.sin_family = AF_INET_OFFLOAD;
       
   102 +#else
       
   103  	sin.sin_family = AF_INET;
       
   104 +#endif
       
   105  	sin.sin_port = htons(opts->starting_port);
       
   106  	sin.sin_addr.s_addr = htonl(opts->receive_addr);
       
   107  
       
   108 @@ -677,7 +711,11 @@
       
   109  	size = sizeof(struct rdma_key_o_meter)
       
   110  			+ 2 * nr_tasks * sizeof(*kt)
       
   111  			+ 2 * RDMA_MAX_TRACKED_KEYS * sizeof(*ks);
       
   112 +#if defined(__SVR4) && defined(__sun)
       
   113 +	base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
       
   114 +#else
       
   115  	base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, 0, 0);
       
   116 +#endif
       
   117  	if (base == MAP_FAILED)
       
   118  		die_errno("alloc_rdma_buffers: mmap failed");
       
   119  
       
   120 @@ -828,7 +866,7 @@
       
   121  	}
       
   122  
       
   123  	if (!failed)
       
   124 -		trace("compare pass pattern %Lx addr %p\n",
       
   125 +		trace("compare pass pattern 0x%Lx addr %p\n",
       
   126  			(unsigned long long) pattern, addr);
       
   127  }
       
   128  
       
   129 @@ -865,7 +903,11 @@
       
   130  	/* We use mmap here rather than malloc, because it is always
       
   131  	 * page aligned. */
       
   132  	len = 2 * opts->nr_tasks * opts->req_depth * (opts->rdma_vector * opts->rdma_size) + sys_page_size;
       
   133 +#if defined(__SVR4) && defined(__sun)
       
   134 +	base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
       
   135 +#else	
       
   136  	base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
       
   137 +#endif
       
   138  	if (base == MAP_FAILED)
       
   139  		die_errno("alloc_rdma_buffers: mmap failed");
       
   140  	memset(base, 0x2f, len);
       
   141 @@ -915,17 +957,16 @@
       
   142  	if (RDMA_OP_READ == hdr->rdma_op) {
       
   143  		if (opt.verify)
       
   144  			rds_fill_buffer(rdma_addr, rdma_size, hdr->rdma_pattern);
       
   145 -		trace("Requesting RDMA read for pattern %Lx "
       
   146 -				"local addr to rdma read %p\n",
       
   147 -				(unsigned long long) hdr->rdma_pattern,
       
   148 +		trace("Requesting RDMA read for pattern 0x%Lx"
       
   149 +				"local addr to rdma read 0x%p\n",
       
   150 +				hdr->rdma_pattern,
       
   151  				rdma_addr);
       
   152  	} else {
       
   153  		if (opt.verify)
       
   154  			rds_fill_buffer(rdma_addr, rdma_size, 0);
       
   155 -		trace("Requesting RDMA write for pattern %Lx "
       
   156 -				"local addr to rdma write %p\n",
       
   157 -				(unsigned long long) hdr->rdma_pattern,
       
   158 -				rdma_addr);
       
   159 +
       
   160 +		trace("Requesting RDMA write for pattern 0x%Lx",
       
   161 +				hdr->rdma_pattern);
       
   162  	}
       
   163  }
       
   164  
       
   165 @@ -947,7 +988,7 @@
       
   166  		die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op);
       
   167  
       
   168  
       
   169 -	trace("RDS received request to issue rdma %s len %lu rva %Lx key %Lx pattern %Lx\n",
       
   170 +	trace("RDS received request to issue rdma %s len %lu rva 0x%Lx key 0x%Lx pattern 0x%Lx\n",
       
   171  		in_hdr->rdma_op == RDMA_OP_WRITE? "write to" : "read from",
       
   172  		rdma_size,
       
   173  		(unsigned long long) in_hdr->rdma_addr,
       
   174 @@ -1007,6 +1048,9 @@
       
   175  	t->drain_rdmas = 0;
       
   176  }
       
   177  
       
   178 +#if defined(__SVR4) && defined(__sun)
       
   179 +#undef MSG_MAXIOVLEN
       
   180 +#endif
       
   181  #define MSG_MAXIOVLEN 2
       
   182  
       
   183  /*
       
   184 @@ -1560,7 +1604,12 @@
       
   185  	struct timeval start;
       
   186          int do_work = opts->simplex ? active : 1;
       
   187  
       
   188 +#if defined(__SVR4) && defined(__sun)
       
   189 +	set_my_lgrp();
       
   190 +	sin.sin_family = AF_INET_OFFLOAD;
       
   191 +#else
       
   192  	sin.sin_family = AF_INET;
       
   193 +#endif
       
   194  	sin.sin_port = htons(opts->starting_port + 1 + id);
       
   195  	sin.sin_addr.s_addr = htonl(opts->receive_addr);
       
   196  
       
   197 @@ -1572,7 +1621,11 @@
       
   198  	for (i = 0; i < opts->nr_tasks; i++) {
       
   199  		tasks[i].nr = i;
       
   200  		tasks[i].src_addr = sin;
       
   201 +#if defined(__SVR4) && defined(__sun)
       
   202 +		tasks[i].dst_addr.sin_family = AF_INET_OFFLOAD;
       
   203 +#else
       
   204  		tasks[i].dst_addr.sin_family = AF_INET;
       
   205 +#endif
       
   206  		tasks[i].dst_addr.sin_addr.s_addr = htonl(opts->send_addr);
       
   207  		tasks[i].dst_addr.sin_port = htons(opts->starting_port + 1 + i);
       
   208  		tasks[i].send_time = alloca(opts->req_depth * sizeof(struct timeval));
       
   209 @@ -1625,6 +1678,10 @@
       
   210  				;
       
   211  		}
       
   212  
       
   213 +		/* stop sending if in shutdown phase */
       
   214 +		if (ctl->stopping)
       
   215 +			continue;
       
   216 +
       
   217  		/* keep the pipeline full */
       
   218  		can_send = !!(pfd.revents & POLLOUT);
       
   219  		for (i = 0, t = tasks; i < opts->nr_tasks; i++, t++) {
       
   220 @@ -1665,8 +1722,12 @@
       
   221  	uint32_t i;
       
   222  
       
   223  	len = opts->nr_tasks * sizeof(*ctl);
       
   224 +#if defined(__SVR4) && defined(__sun)
       
   225 +	ctl = (struct child_control *)mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
       
   226 +#else
       
   227  	ctl = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED,
       
   228  		   0, 0);
       
   229 +#endif
       
   230  	if (ctl == MAP_FAILED)
       
   231  		die("mmap of %u child control structs failed", opts->nr_tasks);
       
   232  
       
   233 @@ -1699,7 +1760,7 @@
       
   234  			continue;
       
   235  		pid = waitpid(-1, NULL, WNOHANG);
       
   236  		if (pid)
       
   237 -			die("child %u (pid %u) exited\n", i, pid);
       
   238 +			die("child %u (pid %u) exited\n", i, (int)pid);
       
   239  		sleep(1);
       
   240  		i--; /* try this child again */
       
   241  	}
       
   242 @@ -1967,7 +2028,7 @@
       
   243  
       
   244  	pid = waitpid(-1, &status, wflags);
       
   245  	if (pid < 0)
       
   246 -		die("waitpid returned %u", pid);
       
   247 +		die("waitpid returned %u", (int)pid);
       
   248  	if (pid == 0)
       
   249  		return 0;
       
   250  
       
   251 @@ -1975,15 +2036,15 @@
       
   252  		if (WEXITSTATUS(status) == 0)
       
   253  			return 1;
       
   254  		die("child pid %u exited with status %d\n",
       
   255 -				pid, WEXITSTATUS(status));
       
   256 +				(int)pid, WEXITSTATUS(status));
       
   257  	}
       
   258  	if (WIFSIGNALED(status)) {
       
   259  		if (WTERMSIG(status) == SIGTERM)
       
   260  			return 1;
       
   261  		die("child pid %u exited with signal %d\n",
       
   262 -				pid, WTERMSIG(status));
       
   263 +				(int)pid, WTERMSIG(status));
       
   264  	}
       
   265 -	die("child pid %u wait status %d\n", pid, status);
       
   266 +	die("child pid %u wait status %d\n", (int)pid, status);
       
   267  }
       
   268  
       
   269  static void release_children_and_wait(struct options *opts,
       
   270 @@ -2139,7 +2200,12 @@
       
   271  	control_fd = -1;
       
   272  
       
   273  	if (nr_running) {
       
   274 +		/* let everything gracefully stop before we kill the chillins */
       
   275  		for (i = 0; i < opts->nr_tasks; i++)
       
   276 +			ctl[i].stopping = 1;
       
   277 +		sleep(1);
       
   278 +
       
   279 +		for (i = 0; i < opts->nr_tasks; i++)
       
   280  			kill(ctl[i].pid, SIGTERM);
       
   281  		stop_soakers(soak_arr);
       
   282  	}
       
   283 @@ -2517,7 +2583,11 @@
       
   284  	/* an extra terminating entry which will be all 0s */
       
   285  	len = (nr_soak + 1) * sizeof(struct soak_control);
       
   286  	soak_arr = mmap(NULL, len, PROT_READ|PROT_WRITE,
       
   287 +#if defined(__SVR4) && defined(__sun)
       
   288 +			MAP_ANONYMOUS|MAP_SHARED, -1, 0);
       
   289 +#else
       
   290  			MAP_ANONYMOUS|MAP_SHARED, 0, 0);
       
   291 +#endif
       
   292  	if (soak_arr == MAP_FAILED)
       
   293  		die("mmap of %ld soak control structs failed", nr_soak);
       
   294  
       
   295 @@ -2589,6 +2659,7 @@
       
   296  { "rtprio",		no_argument,		NULL,	'R'	},
       
   297  { "verify",		no_argument,		NULL,	'v'	},
       
   298  { "trace",		no_argument,		NULL,	'V'	},
       
   299 +{ "lgrpid",		required_argument,	NULL,	'g'	},
       
   300  
       
   301  { "rdma-use-once",	required_argument,	NULL,	OPT_RDMA_USE_ONCE },
       
   302  { "rdma-use-get-mr",	required_argument,	NULL,	OPT_RDMA_USE_GET_MR },
       
   303 @@ -2652,7 +2723,7 @@
       
   304  	while(1) {
       
   305  		int c, index;
       
   306  
       
   307 -		c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVz",
       
   308 +		c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVg:z",
       
   309  				long_options, &index);
       
   310  		if (c == -1)
       
   311  			break;
       
   312 @@ -2711,6 +2782,10 @@
       
   313  			case 'V':
       
   314  				opts.tracing = 1;
       
   315  				break;
       
   316 +			case 'g':
       
   317 +				lgrp_id = (lgrp_id_t)parse_ull(optarg,
       
   318 +				    (uint32_t)~0);
       
   319 +				break;
       
   320  			case OPT_USE_CONG_MONITOR:
       
   321  				opts.use_cong_monitor = parse_ull(optarg, 1);
       
   322  				break;
       
   323 @@ -2786,6 +2861,7 @@
       
   324  	if (opts.rdma_size && 0)
       
   325  		opts.rdma_size = (opts.rdma_size + 4095) & ~4095;
       
   326  
       
   327 +	set_my_lgrp();
       
   328  	opt = opts;
       
   329  	return active_parent(&opts, soak_arr);
       
   330  }
       
   331 diff -r -u /tmp/rds-tools-2.0.4/pfhack.h rds-tools-2.0.7/pfhack.h
       
   332 --- /tmp/rds-tools-2.0.4/pfhack.h	Wed Aug  4 15:25:11 2010
       
   333 +++ rds-tools-2.0.7/pfhack.h	Thu Feb 24 13:27:51 2011
       
   334 @@ -44,9 +44,11 @@
       
   335  #ifndef __PF_HACK_H
       
   336  #define __PF_HACK_H
       
   337  
       
   338 +#if !((defined(__SVR4) && defined(__sun)))
       
   339  #define PF_RDS		21
       
   340  #define AF_RDS		21
       
   341  #define SOL_RDS		276
       
   342 +#endif
       
   343  
       
   344  extern int discover_pf_rds();
       
   345  extern int discover_sol_rds();
       
   346 diff -r -u /tmp/rds-tools-2.0.4/rds-info.c rds-tools-2.0.7/rds-info.c
       
   347 --- /tmp/rds-tools-2.0.4/rds-info.c	Wed Aug  4 15:25:10 2010
       
   348 +++ rds-tools-2.0.7/rds-info.c	Thu Feb 24 13:27:51 2011
       
   349 @@ -42,16 +42,27 @@
       
   350  #include <sys/types.h>
       
   351  #include <sys/socket.h>
       
   352  #include <errno.h>
       
   353 +#if defined(__SVR4) && defined(__sun)
       
   354 +#include <strings.h>
       
   355 +#else
       
   356  #include <string.h>
       
   357 +#endif
       
   358  #include <inttypes.h>
       
   359  #include <netinet/in.h>
       
   360  #include <arpa/inet.h>
       
   361  
       
   362 +#if defined(__SVR4) && defined(__sun)
       
   363 +#include <sys/rds.h>
       
   364 +#else
       
   365  #include "rds.h"
       
   366 +#endif
       
   367  #include "pfhack.h"
       
   368  
       
   369 +/* WHUPS changed the struct rds_info_connection definition b/w rds in 1.4 & 1.5. gotta support both
       
   370 +   for now. TODO remove check of transport[15] once ofed pre-1.5 is extinct. */
       
   371  #define rds_conn_flag(conn, flag, letter) \
       
   372 -	(conn.flags & RDS_INFO_CONNECTION_FLAG_##flag ? letter : '-')
       
   373 +	(conn.flags & RDS_INFO_CONNECTION_FLAG_##flag \
       
   374 +	|| conn.transport[15] & RDS_INFO_CONNECTION_FLAG_##flag ? letter : '-')
       
   375  
       
   376  #define min(a, b) (a < b ? a : b)
       
   377  #define array_size(foo) (sizeof(foo) / sizeof(foo[0]))
       
   378 @@ -234,8 +245,10 @@
       
   379  		print_msgs, "Send", 0 },
       
   380  	['t'] = { RDS_INFO_RETRANS_MESSAGES, "retransmit queue messages",
       
   381  		  print_msgs, "Retransmit", 0 },
       
   382 +#if !(defined(__SVR4) && defined(__sun))
       
   383  	['T'] = { RDS_INFO_TCP_SOCKETS, "TCP transport sockets",
       
   384  		  print_tcp_socks, NULL, 0 },
       
   385 +#endif
       
   386  	['I'] = { RDS_INFO_IB_CONNECTIONS, "IB transport connections",
       
   387  		  print_ib_conns, NULL, 0 },
       
   388  };
       
   389 @@ -266,6 +279,10 @@
       
   390  	char optstring[258] = "v+";
       
   391  	int given_options = 0;
       
   392  	socklen_t len = 0;
       
   393 +#if defined(__SVR4) && defined(__sun)
       
   394 +	socklen_t ulen;
       
   395 +	struct rds_info_arg arg;
       
   396 +#endif
       
   397  	void *data = NULL;
       
   398  	int fd;
       
   399  	int each;
       
   400 @@ -322,6 +339,7 @@
       
   401  		    (given_options && !infos[i].option_given))
       
   402  			continue;
       
   403  
       
   404 +#if !(defined(__SVR4) && defined(__sun))
       
   405  		/* read in the info until we get a full snapshot */
       
   406  		while ((each = getsockopt(fd, sol, infos[i].opt_val, data,
       
   407  				   &len)) < 0) {
       
   408 @@ -345,15 +363,60 @@
       
   409  				return 1;
       
   410  			}
       
   411  		}
       
   412 +#else
       
   413 +		/* 1st call gets the length of the data available */
       
   414 +		ulen = 0;
       
   415 +		bzero(&arg, sizeof (struct rds_info_arg));
       
   416 +		arg.lenp = (uint64_t)(uintptr_t)&ulen;
       
   417 +		arg.datap = NULL;
       
   418 +		each = ioctl(fd, infos[i].opt_val, &arg);
       
   419 +		if ((each < 0) && (errno != ENOSPC)) {
       
   420 +			verbosef(0, stderr, "%s: Unable get statistics: %s\n",
       
   421 +			    progname, strerror(errno));
       
   422 +			return 1;
       
   423 +		}
       
   424  
       
   425 +		/* No data at the driver */
       
   426 +		if (ulen == 0)
       
   427 +			invalid_opt = 1;;
       
   428 +#endif
       
   429 +
       
   430  		if (invalid_opt)
       
   431  			continue;
       
   432  
       
   433 +#if !(defined(__SVR4) && defined(__sun))
       
   434  		infos[i].print(data, each, len, infos[i].extra);
       
   435 +#else
       
   436 +		do {
       
   437 +			arg.datap = (uint64_t)(uintptr_t)realloc(
       
   438 +			    (char *)(uintptr_t)arg.datap, ulen);
       
   439 +			if (arg.datap == NULL) {
       
   440 +			    verbosef(0, stderr, "%s: Unable to allocate memory "
       
   441 +				"for %u bytes of info: %s\n",
       
   442 +				progname, ulen, strerror(errno));
       
   443 +				return 1;
       
   444 +			}
       
   445  
       
   446 +			/* 2nd call gets the data */
       
   447 +			len = ulen;
       
   448 +			each = ioctl(fd, infos[i].opt_val, &arg);
       
   449 +			if ((each < 0) && (errno != ENOSPC)) {
       
   450 +				verbosef(0, stderr,
       
   451 +				    "%s: Unable get statistics: %s\n",
       
   452 +				    progname, strerror(errno));
       
   453 +				return 1;
       
   454 +			}
       
   455 +		} while (ulen > len);
       
   456 +
       
   457 +		infos[i].print((void *)(uintptr_t)arg.datap, each, ulen,
       
   458 +		    infos[i].extra);
       
   459 +#endif
       
   460  		if (given_options && --given_options == 0)
       
   461  			break;
       
   462  	}
       
   463 +#if defined(__SVR4) && defined(__sun)
       
   464 +	free((void *)(uintptr_t)arg.datap);
       
   465 +#endif
       
   466  
       
   467  	return 0;
       
   468  }
       
   469 diff -r -u /tmp/rds-tools-2.0.4/rds.7 rds-tools-2.0.7/rds.7
       
   470 --- /tmp/rds-tools-2.0.4/rds.7	Wed Aug  4 15:25:11 2010
       
   471 +++ rds-tools-2.0.7/rds.7	Thu Feb 24 13:27:52 2011
       
   472 @@ -6,6 +6,7 @@
       
   473  .nf
       
   474  .B #include <sys/socket.h>
       
   475  .B #include <netinet/in.h>
       
   476 +.B #include <sys/rds.h>
       
   477  .fi
       
   478  .SH DESCRIPTION
       
   479  This is an implementation of the RDS socket API. It provides reliable,
       
   480 @@ -14,18 +15,13 @@
       
   481  Currently, RDS can be transported over Infiniband, and loopback.
       
   482  RDS over TCP is disabled, but will be re-enabled in the near future.
       
   483  .PP
       
   484 -RDS uses standard
       
   485 -.B AF_INET
       
   486 -addresses as described in
       
   487 -.BR ip (7)
       
   488 +RDS uses 
       
   489 +.B AF_INET_OFFLOAD address family 
       
   490  to identify end points.
       
   491  .\"------------------------------------------------------------------
       
   492  .SS Socket Creation
       
   493  RDS is still in development and as such does not have a reserved protocol
       
   494 -family constant. Applications must read the string representation of the
       
   495 -protocol family value from the
       
   496 -.B pf_rds
       
   497 -sysctl parameter file described below.
       
   498 +family constant.  Applications should use AF_INET_OFFLOAD.
       
   499  .PP
       
   500  .nf
       
   501  .B rds_socket = socket(pf_rds, SOCK_SEQPACKET, 0);
       
   502 @@ -58,9 +54,6 @@
       
   503  .BR SOL_RDS ).
       
   504  Just as with the RDS protocol family, an official value has not been
       
   505  assigned yet, so the kernel will assign a value dynamically.
       
   506 -The assigned value can be retrieved from the
       
   507 -.B sol_rds
       
   508 -sysctl parameter file.
       
   509  .PP
       
   510  RDS specific socket options will be described in a separate section
       
   511  below.
       
   512 @@ -77,7 +70,7 @@
       
   513  .PP
       
   514  For instance, when binding to the address of an Infiniband interface
       
   515  such as
       
   516 -.BR ib0 ,
       
   517 +.BR ibd0 ,
       
   518  the socket will use the Infiniband transport. If RDS is not able
       
   519  to associate a transport with the given address, it will return
       
   520  .BR EADDRNOTAVAIL .
       
   521 @@ -394,47 +387,6 @@
       
   522  be delivered in the order they're sent. Messages sent from different
       
   523  sockets, or to different destinations, may be delivered in any order.
       
   524  .\"------------------------------------------------------------------
       
   525 -.SH SYSCTL VALUES
       
   526 -These parameteres may only be accessed through their files in
       
   527 -.BR /proc/sys/net/rds .
       
   528 -Access through
       
   529 -.BR sysctl (2)
       
   530 -is not supported.
       
   531 -.TP
       
   532 -.B pf_rds
       
   533 -This file contains the string representation of the protocol family
       
   534 -constant passed to
       
   535 -.BR socket (2)
       
   536 -to create a new RDS socket.
       
   537 -.TP
       
   538 -.B sol_rds
       
   539 -This file contains the string representation of the socket level parameter
       
   540 -that is passed to
       
   541 -.BR getsockopt (2)
       
   542 -and
       
   543 -.BR setsockopt (2)
       
   544 -to manipulate RDS socket options.
       
   545 -.TP
       
   546 -.BR max_unacked_bytes " and " max_unacked_packets
       
   547 -These parameters are used to tune the generation of acknowledgements. By
       
   548 -default, the system receiving RDS messages does not send back explicit
       
   549 -acknowledgements unless it transmits a message of its own (in which
       
   550 -case the ACK is piggybacked onto the outgoing message), or when the sending
       
   551 -system requests an ACK.
       
   552 -.IP
       
   553 -However, the sender needs to see an ACK from time to time so that it
       
   554 -can purge old messages from the send queue. The unacked bytes and
       
   555 -packet counters are used to keep track of how much data has been
       
   556 -sent without requesting an ACK. The default is to request an acknowledgement
       
   557 -every 16 packets, or every 16 MB, whichever comes first.
       
   558 -.TP
       
   559 -.BR reconnect_delay_min_ms " and " reconnect_delay_max_ms
       
   560 -RDS uses host-to-host connections to transport RDS messages (both for the TCP
       
   561 -and the Infiniband transport). If this connection breaks, RDS will try to
       
   562 -re-establish the connection. Because this reconnect may be triggered by
       
   563 -both hosts at the same time and fail, RDS uses a random backoff before attempting
       
   564 -a reconnect. These two parameters specify the minimum and maximum delay in
       
   565 -milliseconds. The default values are 1 and 1000, respectively.
       
   566  .SH SEE ALSO
       
   567  .BR rds-rdma (7),
       
   568  .BR socket (2),
       
   569 diff -r -u /tmp/rds-tools-2.0.4/rds-info.1 rds-tools-2.0.7/rds-info.1
       
   570 --- /tmp/rds-tools-2.0.4/rds-info.1	Wed Aug  4 15:25:11 2010
       
   571 +++ rds-tools-2.0.7/rds-info.1	Thu Feb 24 13:27:51 2011
       
   572 @@ -1,162 +1,150 @@
       
   573 -.Dd October 30, 2006
       
   574 -.Dt RDS-INFO 1
       
   575 -.Os
       
   576 -.Sh NAME
       
   577 -.Nm rds-info
       
   578 -.Nd display information from the RDS kernel module
       
   579 -.Pp
       
   580 -.Sh SYNOPSIS
       
   581 -.Nm rds-info
       
   582 -.Op Fl v
       
   583 -.Bk -words
       
   584 -.Op Fl cknrstIT
       
   585 +.TH RDS-INFO 1 "October 30, 2006"
       
   586 +.SH "NAME"
       
   587 +rds-info - display information from the RDS kernel module
       
   588 +.SH SYNOPSIS
       
   589 +.B rds-info [-cknrstIT]
       
   590  
       
   591 -.Sh DESCRIPTION
       
   592 -The
       
   593 -.Nm
       
   594 -utility presents various sources of information that
       
   595 +.SH DESCRIPTION
       
   596 +.PP
       
   597 +The utility presents various sources of information that
       
   598  the RDS kernel module maintains.  When run without any optional arguments
       
   599 -.Nm
       
   600  will output all the information it knows of.  When options are specified then
       
   601  only the information associated with those options is displayed.
       
   602  
       
   603  The options are as follows:
       
   604 -.Bl -tag -width Ds
       
   605 -.It Fl v
       
   606 +.SH OPTIONS
       
   607 +.PP
       
   608 +.TP 7
       
   609 +\fB\-v
       
   610  Requests verbose output. When this option is given, some classes of information
       
   611  will display additional data.
       
   612  
       
   613 -.It Fl c
       
   614 +.TP
       
   615 +\fB\-c
       
   616  Display global counters.  Each counter increments as its event
       
   617  occurs.  The counters may not be reset.  The set of supported counters
       
   618  may change over time.
       
   619  
       
   620 -.Bl -tag -width 4
       
   621 -.It CounterName
       
   622 +.IP	CounterName
       
   623  The name of the counter.  These names come from the kernel and can change
       
   624  depending on the capability of the kernel module.
       
   625 -.It Value
       
   626 +.IP 	Value
       
   627  The number of times that the counter has been incremented since the kernel
       
   628  module was loaded.
       
   629 -.El
       
   630  
       
   631 -.It Fl k
       
   632 +.TP
       
   633 +\fB\-k\fR
       
   634  Display all the RDS sockets in the system.  There will always be one socket
       
   635  listed that is neither bound to nor connected to any addresses because
       
   636 -.Nm
       
   637  itself uses an unbound socket to collect information.
       
   638  
       
   639 -.Bl -tag -width 4
       
   640 -.It BoundAddr, BPort
       
   641 +.IP	BoundAddr, BPort
       
   642  The IP address and port that the socket is bound to.  0.0.0.0 0 indicates that
       
   643  the socket has not been bound.
       
   644 -.It ConnAddr, CPort
       
   645 +.IP	ConnAddr, CPort
       
   646  The IP address and port that the socket is connected to.  0.0.0.0 0 indicates
       
   647  that the socket has not been connected.
       
   648 -.It SndBuf, RcvBuf
       
   649 +.IP	SndBuf, RcvBuf
       
   650  The number of bytes of message payload which can be queued for sending or
       
   651  receiving on the socket, respectively.
       
   652 -.It Inode
       
   653 +.IP	Inode
       
   654  The number of the inode object associated with the socket. Can be used to
       
   655  locate the process owning a given socket by searching /proc/*/fd for
       
   656  open files referencing a socket with this inode number.
       
   657 -.El
       
   658  
       
   659 -.It Fl n
       
   660 +.TP
       
   661 +\fB\-n\fR
       
   662  Display all RDS connections.  RDS connections are maintained between
       
   663  nodes by transports.  
       
   664  
       
   665 -.Bl -tag -width 4
       
   666 -.It LocalAddr
       
   667 +.IP	LocalAddr
       
   668  The IP address of this node.  For connections that originate and terminate on
       
   669  the same node the local address indicates which address initiated the
       
   670  connection establishment.
       
   671 -.It RemoteAddr
       
   672 +.IP	RemoteAddr
       
   673  The IP address of the remote end of the connection.  
       
   674 -.It NextTX
       
   675 +.IP	NextTX
       
   676  The sequence number that will be given to the next message that is sent
       
   677  over the connection.
       
   678 -.It NextRX
       
   679 +.IP	NextRX
       
   680  The sequence number that is expected from the next message to arrive over
       
   681  the connection.  Any incoming messages with sequence numbers less than this
       
   682  will be dropped.
       
   683 -.It Flg
       
   684 +.IP	Flg
       
   685  Flags which indicate the state of the connection. 
       
   686 -.Bl -tag -width 4
       
   687 -.It s
       
   688 -A process is currently sending a message down the connection.
       
   689 -.It c
       
   690 -The transport is attempting to connect to the remote address.
       
   691 -.It C
       
   692 -The connection to the remote host is connected and active.
       
   693 -.El
       
   694 -.El
       
   695  
       
   696 -.It Fl r, Fl s, Fl t
       
   697 +.IP 		s
       
   698 +	A process is currently sending a message down
       
   699 +	the connection.
       
   700 +.IP 		c
       
   701 +	The transport is attempting to connect to the
       
   702 +	remote address.
       
   703 +.IP 		C
       
   704 +	The connection to the remote host is connected
       
   705 +	and active.
       
   706 +
       
   707 +.TP
       
   708 +\fB\-r\fR, \fB\-s\fR, \fB\-t\fR
       
   709  Display the messages in the receive, send, or retransmit queues respectively.
       
   710 -.Bl -tag -width 4
       
   711 -.It LocalAddr, LPort
       
   712 +
       
   713 +.IP	LocalAddr, LPort
       
   714  The local IP address and port on this node associated with the message. For
       
   715  sent messages this is the source address, for receive messages it is the
       
   716  destination address.
       
   717 -.It RemoteAddr, RPort
       
   718 +.IP	RemoteAddr, RPort
       
   719  The remote IP address and port associated with the message. For sent messages
       
   720  this is the destination address, for receive messages it is the source address.
       
   721 -.It Seq
       
   722 +.IP	Seq
       
   723  The sequence number of the message.
       
   724 -.It Bytes
       
   725 +.IP	Bytes
       
   726  The number of bytes in the message payload.
       
   727 -.El
       
   728  
       
   729 +
       
   730 +.PP
       
   731  The following information sources are dependent on specific transports which
       
   732  may not always be available. 
       
   733  
       
   734 -.It Fl I
       
   735 +.TP 7
       
   736 +\fB\-I\fR
       
   737  Display the IB connections which the IB transport is using to provide
       
   738  RDS connections.
       
   739  
       
   740 -.Bl -tag -width 4
       
   741 -.It LocalAddr
       
   742 +.IP	LocalAddr
       
   743  The local IP address of this connection.
       
   744 -.It RemoteAddr
       
   745 +.IP	RemoteAddr
       
   746  The remote IP address of this connection.
       
   747 -.It LocalDev
       
   748 +.IP	LocalDev
       
   749  The local IB Global Identifier, printed in IPv6 address syntax.
       
   750 -.It RemoteDev
       
   751 +.IP	RemoteDev
       
   752  The remote IB Global Identifier, printed in IPv6 address syntax.
       
   753 -.El
       
   754  
       
   755  If verbose output is requested, per-connection settings such as the
       
   756  maximum number of send and receive work requests will be displayed
       
   757  in addition.
       
   758  
       
   759 -.It Fl T
       
   760 +.TP 7
       
   761 +\fB\-T\fR
       
   762  Display the TCP sockets which the TCP transport is using to provide
       
   763  RDS connections.
       
   764  
       
   765 -.Bl -tag -width 4
       
   766 -.It LocalAddr, LPort
       
   767 +.IP	LocalAddr, LPort
       
   768  The local IP address and port of this socket.
       
   769 -.It RemoteAddr, RPort
       
   770 +.IP	RemoteAddr, RPort
       
   771  The remote IP address and port that this socket is connected to.
       
   772 -.It HdrRemain
       
   773 +.IP	HdrRemain
       
   774  The number of bytes that must be read off the socket to complete the next
       
   775  full RDS header.
       
   776 -.It DataRemain
       
   777 +.IP	DataRemain
       
   778  The number of bytes that must be read off the socket to complete the data
       
   779  payload of the message which is being received.
       
   780 -.It SentNxt
       
   781 +.IP	SentNxt
       
   782  The TCP sequence number of the first byte of the last message that we sent
       
   783  down the connection.
       
   784 -.It ExpectedUna
       
   785 +.IP	ExpectedUna
       
   786  The TCP sequence number of the byte past the last byte of the last message
       
   787  that we sent down the connection.  When we see that the remote side has
       
   788  acked up to this byte then we know that the remote side has received all
       
   789  our RDS messages.
       
   790 -.It SeenUna
       
   791 +.IP	SeenUna
       
   792  The TCP sequence number of the byte past the last byte which has been
       
   793  acked by the remote host.
       
   794 -.El
       
   795 -
       
   796 -.El
       
   797 -.Pp
       
   798 diff -r -u /tmp/rds-tools-2.0.4/rds-ping.1 rds-tools-2.0.7/rds-ping.1
       
   799 --- /tmp/rds-tools-2.0.4/rds-ping.1	Wed Aug  4 15:25:11 2010
       
   800 +++ rds-tools-2.0.7/rds-ping.1	Thu Feb 24 13:27:52 2011
       
   801 @@ -1,69 +1,54 @@
       
   802 -.Dd Apr 22, 2008
       
   803 -.Dt RDS-PING 1
       
   804 -.Os
       
   805 -.Sh NAME
       
   806 -.Nm rds-ping
       
   807 -.Nd test reachability of remote node over RDS
       
   808 -.Pp
       
   809 -.Sh SYNOPSIS
       
   810 -.Nm rds-ping
       
   811 -.Bk -words
       
   812 -.Op Fl c Ar count
       
   813 -.Op Fl i Ar interval
       
   814 -.Op Fl I Ar local_addr
       
   815 -.Ar remote_addr
       
   816 +.TH RDS-PING 1 "Apr 22, 2008"
       
   817 +.SH NAME
       
   818 +rds-ping - test reachability of remote node over RDS
       
   819  
       
   820 -.Sh DESCRIPTION
       
   821 -.Nm rds-ping
       
   822 -is used to test whether a remote node is reachable over RDS.
       
   823 -Its interface is designed to operate pretty much the standard
       
   824 -.Xr ping 8
       
   825 +.SH SYNOPSIS
       
   826 +.B rds-ping [-c count] [-i interval] [-I local_addr] remote_addr
       
   827 +
       
   828 +.SH DESCRIPTION
       
   829 +.PP
       
   830 +rds-ping is used to test whether a remote node is reachable over RDS.
       
   831 +Its interface is designed to operate pretty much the standard ping(1M) 
       
   832  utility, even though the way it works is pretty different.
       
   833 -.Pp
       
   834 -.Nm rds-ping
       
   835 -opens several RDS sockets and sends packets to port 0 on
       
   836 +.PP
       
   837 +rds-ping opens several RDS sockets and sends packets to port 0 on
       
   838  the indicated host. This is a special port number to which
       
   839  no socket is bound; instead, the kernel processes incoming
       
   840  packets and responds to them.
       
   841 -.Sh OPTIONS
       
   842 +.SH OPTIONS
       
   843  The following options are available for use on the command line:
       
   844 -.Bl -tag -width Ds
       
   845 -.It Fl c Ar count
       
   846 -Causes
       
   847 -.Nm rds-ping
       
   848 -to exit after sending (and receiving) the specified number of
       
   849 +.PP
       
   850 +.TP 7
       
   851 +\fB\-c count
       
   852 +Causes rds-ping to exit after sending (and receiving) the specified number of
       
   853  packets.
       
   854 -.It Fl I Ar address
       
   855 -By default,
       
   856 -.Nm rds-ping
       
   857 -will pick the local source address for the RDS socket based
       
   858 +.TP
       
   859 +\fB\-I address
       
   860 +By default, rds-ping will pick the local source address for the RDS socket based
       
   861  on routing information for the destination address (i.e. if
       
   862  packets to the given destination would be routed through interface
       
   863 -.Nm ib0 ,
       
   864 +ib0 ,
       
   865  then it will use the IP address of
       
   866 -.Nm ib0
       
   867 +ib0
       
   868  as source address).
       
   869  Using the
       
   870  .Fl I
       
   871  option, you can override this choice.
       
   872 -.It Fl i Ar timeout
       
   873 -By default,
       
   874 -.Nm rds-ping
       
   875 -will wait for one second between sending packets. Use this option
       
   876 +.TP
       
   877 +\fB\-i timeout
       
   878 +By default, rds-ping will wait for one second between sending packets. Use this option
       
   879  to specified a different interval. The timeout value is given in
       
   880  seconds, and can be a floating point number. Optionally, append
       
   881 -.Nm msec
       
   882 +msec
       
   883  or
       
   884 -.Nm usec
       
   885 +usec
       
   886  to specify a timeout in milliseconds or microseconds, respectively.
       
   887 -.It
       
   888 +.IP
       
   889  Specifying a timeout considerably smaller than the packet round-trip
       
   890  time will produce unexpected results.
       
   891 -.El
       
   892 -.Sh AUTHORS
       
   893 -.Nm rds-ping
       
   894 +
       
   895 +.SH AUTHORS
       
   896 +rds-ping
       
   897  was written by Olaf Kirch <[email protected]>.
       
   898 -.Sh SEE ALSO
       
   899 -.Xr rds 7 ,
       
   900 -.Xr rds-info 1 ,
       
   901 -.Xr rds-stress 1 .
       
   902 +.S~ SEE ALSO
       
   903 +rds 7, rds-info 1, rds-stress 1.
       
   904 diff -r -u /tmp/rds-tools-2.0.4/configure.in rds-tools-2.0.7/configure.in
       
   905 --- /tmp/rds-tools-2.0.4/configure.in	Wed Aug  4 15:25:11 2010
       
   906 +++ rds-tools-2.0.7/configure.in	Thu Feb 24 13:27:51 2011
       
   907 @@ -1,7 +1,7 @@
       
   908  AC_PREREQ(2.55)
       
   909  AC_INIT()
       
   910  
       
   911 -VERSION=2.0.4
       
   912 +VERSION=2.0.7
       
   913  RELEASE=1
       
   914  
       
   915  AC_SUBST(VERSION)
       
   916 diff -r -u /tmp/rds-tools-2.0.4/rds-ping.c rds-tools-2.0.7/rds-ping.c
       
   917 --- /tmp/rds-tools-2.0.4/rds-ping.c	Wed Aug  4 15:25:10 2010
       
   918 +++ rds-tools-2.0.7/rds-ping.c	Thu Feb 24 13:27:52 2011
       
   919 @@ -48,7 +48,11 @@
       
   920  #include <sys/poll.h>
       
   921  #include <fcntl.h>
       
   922  #include <getopt.h>
       
   923 +#if defined(__SVR4) && defined(__sun)
       
   924 +#include <sys/rds.h>
       
   925 +#else
       
   926  #include "rds.h"
       
   927 +#endif
       
   928  
       
   929  #include "pfhack.h"
       
   930  
       
   931 @@ -155,7 +159,12 @@
       
   932  	}
       
   933  
       
   934  	memset(&sin, 0, sizeof(sin));
       
   935 +#if defined(__SVR4) && defined(__sun)
       
   936 +	sin.sin_family = AF_INET_OFFLOAD;
       
   937 +#else
       
   938  	sin.sin_family = AF_INET;
       
   939 +#endif
       
   940 +
       
   941  	sin.sin_addr = opt_dstaddr;
       
   942  
       
   943  	gettimeofday(&next_ts, NULL);
       
   944 @@ -181,7 +190,7 @@
       
   945  				break;
       
   946  
       
   947  			timeradd(&next_ts, &opt_wait, &next_ts);
       
   948 -			if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)))
       
   949 +			if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)) < 0)
       
   950  				err = errno;
       
   951  			sp->sent_id = ++sent;
       
   952  			sp->sent_ts = now;
       
   953 @@ -258,7 +267,11 @@
       
   954  	int pf;
       
   955  
       
   956  	memset(&sin, 0, sizeof(sin));
       
   957 +#if defined(__SVR4) && defined(__sun)
       
   958 +	sin.sin_family = AF_INET_OFFLOAD;
       
   959 +#else
       
   960  	sin.sin_family = AF_INET;
       
   961 +#endif
       
   962  
       
   963  #ifdef DYNAMIC_PF_RDS
       
   964          pf = discover_pf_rds();
       
   965 @@ -278,6 +291,9 @@
       
   966  		if (ufd < 0)
       
   967  			die_errno("unable to create UDP socket");
       
   968  		sin.sin_addr = *dst;
       
   969 +#if defined(__SVR4) && defined(__sun)
       
   970 +		sin.sin_family = AF_INET;
       
   971 +#endif
       
   972  		sin.sin_port = htons(1);
       
   973  		if (connect(ufd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
       
   974  			die_errno("unable to connect to %s",
       
   975 @@ -289,6 +305,9 @@
       
   976  
       
   977  		*src = sin.sin_addr;
       
   978  		close(ufd);
       
   979 +#if defined(__SVR4) && defined(__sun)
       
   980 +		sin.sin_family = AF_INET_OFFLOAD;
       
   981 +#endif
       
   982  	}
       
   983  
       
   984  	sin.sin_addr = *src;
       
   985 diff -r -u /tmp/rds-tools-2.0.4/Makefile.in rds-tools-2.0.7/Makefile.in
       
   986 --- /tmp/rds-tools-2.0.4/Makefile.in	Wed Aug  4 15:25:11 2010
       
   987 +++ rds-tools-2.0.7/Makefile.in	Thu Feb 24 13:27:51 2011
       
   988 @@ -4,10 +4,14 @@
       
   989  mandir		= $(DESTDIR)@mandir@
       
   990  incdir		= $(DESTDIR)@includedir@
       
   991  
       
   992 +CC=gcc
       
   993 +
       
   994  all: all-programs
       
   995  
       
   996 -CFLAGS = -O2 -Wall -Iinclude
       
   997 -CPPFLAGS = -DDEBUG_EXE -DRDS_VERSION=\"@VERSION@\" -MD -MP -MF $(@D)/.$(basename $(@F)).d
       
   998 +CFLAGS += -O2 -Wall -Iinclude
       
   999 +CPPFLAGS += -D_XOPEN_SOURCE=500 -D__EXTENSIONS__ \
       
  1000 +	-DDEBUG_EXE -DRDS_VERSION=\"@VERSION@\" -MD -MP -MF $(@D)/.$(basename $(@F)).d
       
  1001 +LDFLAGS += -lsocket -lnsl -llgrp
       
  1002  
       
  1003  HEADERS = kernel-list.h pfhack.h include/rds.h
       
  1004  COMMON_SOURCES = pfhack.c
       
  1005 @@ -15,7 +19,7 @@
       
  1006  CLEAN_OBJECTS = $(addsuffix .o,$(PROGRAMS)) $(subst .c,.o,$(COMMON_SOURCES))
       
  1007  
       
  1008  # This is the default
       
  1009 -DYNAMIC_PF_RDS = true
       
  1010 +#DYNAMIC_PF_RDS = true
       
  1011  
       
  1012  ifneq ($(DYNAMIC_PF_RDS),)
       
  1013  CPPFLAGS += -DDYNAMIC_PF_RDS
       
  1014 @@ -29,14 +33,14 @@
       
  1015  all-programs: $(PROGRAMS)
       
  1016  
       
  1017  install: $(PROGRAMS)
       
  1018 -	install -d $(bindir)
       
  1019 -	install -m 555 -s $(PROGRAMS) $(bindir)
       
  1020 -	install -d $(mandir)/man1
       
  1021 -	install -d $(mandir)/man7
       
  1022 -	install -m 644 *.1 $(mandir)/man1
       
  1023 -	install -m 644 *.7 $(mandir)/man7
       
  1024 -	install -d $(incdir)/net
       
  1025 -	install -m 444 include/rds.h $(incdir)/net
       
  1026 +	$(INSTALL) -d $(bindir)
       
  1027 +	$(INSTALL) -m 755 -s $(PROGRAMS) $(bindir)
       
  1028 +	$(INSTALL) -d $(mandir)/man1
       
  1029 +	$(INSTALL) -d $(mandir)/man7
       
  1030 +	$(INSTALL) -m 644 *.1 $(mandir)/man1
       
  1031 +	$(INSTALL) -m 644 *.7 $(mandir)/man7
       
  1032 +	$(INSTALL) -d $(incdir)/net
       
  1033 +	$(INSTALL) -m 444 include/rds.h $(incdir)/net
       
  1034  
       
  1035  clean:
       
  1036  	rm -f $(PROGRAMS) $(CLEAN_OBJECTS)
       
  1037 @@ -47,7 +51,7 @@
       
  1038  
       
  1039  
       
  1040  $(PROGRAMS) : % : %.o $(COMMON_OBJECTS)
       
  1041 -	gcc $(CFLAGS) $(LDFLAGS) -o $@ $^
       
  1042 +	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $^
       
  1043  
       
  1044  LOCAL_DFILES := $(wildcard .*.d)
       
  1045  ifneq ($(LOCAL_DFILES),)
       
  1046 @@ -72,8 +76,6 @@
       
  1047  		configure \
       
  1048  		README \
       
  1049  		rds-tools.txt \
       
  1050 -		stap/rds.stp \
       
  1051 -		stap/README \
       
  1052  		docs/rds-architecture.txt \
       
  1053  		examples/Makefile \
       
  1054  		examples/rds-sample.c \
       
  1055 diff -r -u /tmp/rds-tools-2.0.4/examples/rds-sample.c rds-tools-2.0.7/examples/rds-sample.c
       
  1056 --- /tmp/rds-tools-2.0.4/examples/rds-sample.c	Wed Aug  4 15:25:11 2010
       
  1057 +++ rds-tools-2.0.7/examples/rds-sample.c	Thu Feb 24 13:27:53 2011
       
  1058 @@ -26,6 +26,7 @@
       
  1059  #include <string.h>
       
  1060  #include <stdlib.h>
       
  1061  
       
  1062 +#if !(defined(__SVR4) && defined(__sun))
       
  1063  /* FIXME - this is a hack to getaround RDS not exporting any header files.
       
  1064   * This is a local copy of the file found at net/rds/
       
  1065   */
       
  1066 @@ -33,6 +34,9 @@
       
  1067  /* These are defined in rds.h....but that file is not happily included */
       
  1068  #define SOL_RDS		272
       
  1069  #define PF_RDS		28
       
  1070 +#else
       
  1071 +#include <sys/rds.h>
       
  1072 +#endif
       
  1073  
       
  1074  
       
  1075  #define TESTPORT	4000
       
  1076 @@ -107,12 +111,12 @@
       
  1077  	cmsg->cmsg_type = RDS_CMSG_RDMA_ARGS;
       
  1078  	cmsg->cmsg_len = CMSG_LEN(sizeof(struct rds_rdma_args));
       
  1079  
       
  1080 -	iov.addr = (uint64_t) buf;
       
  1081 +	iov.addr = (uint64_t)(uintptr_t)buf;
       
  1082  	iov.bytes = sizeof(struct rdss_message);
       
  1083  
       
  1084  	args->remote_vec.addr = 0;
       
  1085  	args->remote_vec.bytes = sizeof(struct rdss_message);
       
  1086 -	args->local_vec_addr = (uint64_t) &iov;
       
  1087 +	args->local_vec_addr = (uint64_t)(uintptr_t)&iov;
       
  1088  	args->nr_local = 1;
       
  1089  	args->flags = remote_flags ? (RDS_RDMA_READWRITE | RDS_RDMA_FENCE) : 0;
       
  1090  	args->flags |= RDS_RDMA_NOTIFY_ME;
       
  1091 @@ -244,9 +248,9 @@
       
  1092  	void *ctlbuf;
       
  1093  	struct iovec *iov;
       
  1094  
       
  1095 -	mr_args.vec.addr = (uint64_t) buf;
       
  1096 +	mr_args.vec.addr = (uint64_t)(uintptr_t)buf;
       
  1097  	mr_args.vec.bytes = sizeof(struct rdss_message);
       
  1098 -	mr_args.cookie_addr = (uint64_t) cookie;
       
  1099 +	mr_args.cookie_addr = (uint64_t)(uintptr_t)cookie;
       
  1100  	mr_args.flags = RDS_RDMA_USE_ONCE;
       
  1101  
       
  1102  	ctlbuf = calloc(1, CMSG_SPACE(sizeof(mr_args)));
       
  1103 diff -r -u /tmp/rds-tools-2.0.4/examples/Makefile rds-tools-2.0.7/examples/Makefile
       
  1104 --- /tmp/rds-tools-2.0.4/examples/Makefile	Wed Aug  4 15:25:11 2010
       
  1105 +++ rds-tools-2.0.7/examples/Makefile	Thu Feb 24 13:27:52 2011
       
  1106 @@ -1,6 +1,12 @@
       
  1107 +CC=gcc
       
  1108 +LIBS = -lsocket -lnsl
       
  1109 +CPPFLAGS += -D_XOPEN_SOURCE=500 -D__EXTENSIONS__
       
  1110  
       
  1111  all: rds-sample
       
  1112  
       
  1113 -rds-sample: rds-sample.o
       
  1114 +rds-sample: rds-sample.c
       
  1115 +	$(CC) $(CPPFLAGS) $(CFLAGS) -o rds-sample rds-sample.c $(LIBS)
       
  1116  
       
  1117 -CFLAGS = -I ../include
       
  1118 +clean:
       
  1119 +	rm -rf rds-sample
       
  1120 +
       
  1121 diff -r -u /tmp/rds-tools-2.0.4/configure rds-tools-2.0.7/configure
       
  1122 --- /tmp/rds-tools-2.0.4/configure	Wed Aug  4 15:25:11 2010
       
  1123 +++ rds-tools-2.0.7/configure	Thu Feb 24 13:27:51 2011
       
  1124 @@ -1215,7 +1215,7 @@
       
  1125  
       
  1126  
       
  1127  
       
  1128 -VERSION=2.0.4
       
  1129 +VERSION=2.0.7
       
  1130  RELEASE=1
       
  1131  
       
  1132  
       
  1133 diff -r -u /tmp/rds-tools-2.0.4/rds-stress.1 rds-tools-2.0.7/rds-stress.1
       
  1134 --- /tmp/rds-tools-2.0.4/rds-stress.1	Wed Aug  4 15:25:11 2010
       
  1135 +++ rds-tools-2.0.7/rds-stress.1	Thu Feb 24 13:27:52 2011
       
  1136 @@ -1,99 +1,103 @@
       
  1137 -.Dd May 15, 2007
       
  1138 -.Dt RDS-STRESS 1
       
  1139 -.Os
       
  1140 -.Sh NAME
       
  1141 -.Nm rds-stress
       
  1142 -.Nd send messages between processes over RDS sockets
       
  1143 -.Pp
       
  1144 -.Sh SYNOPSIS
       
  1145 -.Nm rds-stress
       
  1146 -.Bk -words
       
  1147 -.Op Fl p Ar port_number
       
  1148 -.Op Fl r Ar receive_address
       
  1149 -.Op Fl s Ar send_address
       
  1150 -.Op Fl a Ar ack_bytes
       
  1151 -.Op Fl q Ar request_bytes
       
  1152 -.Op Fl D Ar rdma_bytes
       
  1153 -.Op Fl d Ar queue_depth
       
  1154 -.Op Fl t Ar nr_tasks
       
  1155 -.Op Fl c
       
  1156 -.Op Fl R
       
  1157 -.Op Fl V
       
  1158 -.Op Fl v
       
  1159 +.TH RDS-STRESS 1 " May 15, 2007"
       
  1160 +.SH "NAME"
       
  1161 +rds-stress - send messages between processes over RDS sockets
       
  1162 +.PP
       
  1163 +.SH SYNOPSIS
       
  1164 +.HP
       
  1165 +.nf
       
  1166 +rds-stress [-p port_number] -r [receive_address] [-s send_address]
       
  1167 +      [-a ack_bytes] [-q request_bytes] [-D rdma_bytes]
       
  1168 +      [-d queue_depth] [-t Ar nr_tasks] [-c] [-R] [-V] [-v] [-o] 
       
  1169 +      [-I iovecs] -M [nr] [-z] [-g lgrpid]
       
  1170 +.fi
       
  1171  
       
  1172 -.Sh DESCRIPTION
       
  1173 -.Nm rds-stress
       
  1174 +
       
  1175 +.SH DESCRIPTION
       
  1176 +.PP
       
  1177 +.Nm
       
  1178 +rds-stress
       
  1179  sends messages between groups tasks, usually running on seperate
       
  1180  machines.
       
  1181 -.Pp
       
  1182 +.PP
       
  1183  First a passive receiving instance is started.
       
  1184 -.Pp
       
  1185 -.Dl $ rds-stress
       
  1186 -.Pp
       
  1187 +.RS 12
       
  1188 +
       
  1189 +	$ rds-stress
       
  1190 +.RE
       
  1191 +.PP
       
  1192  Then an active sending instance is started, giving it
       
  1193  the address and port at which it will find a listening
       
  1194  passive receiver.  In addition, it is given configuration options which
       
  1195  both instances will use.
       
  1196 -.Pp
       
  1197 -.Dl $ rds-stress -s recvhost -p 4000 -t 1 -d 1
       
  1198 -.Pp
       
  1199 +.PP
       
  1200 +.RS 12
       
  1201 +	$ rds-stress -s recvhost -p 4000 -t 1 -d 1
       
  1202 +.RE
       
  1203 +.PP
       
  1204  The active sender will parse the options, connect to the passive receiver, and
       
  1205  send the options over this connection.  From this point on both instances
       
  1206  exhibit the exact same behaviour.
       
  1207 -.Pp
       
  1208 +.PP
       
  1209  They will create a number of child tasks as specified by the -t option.  Once
       
  1210  the children are created the parent sleeps for a second at a time, printing a
       
  1211  summary of statistics at each interval. 
       
  1212 -.Pp
       
  1213 +.PP
       
  1214  Each child will open an RDS socket, each binding to a port number in order
       
  1215  after the port number given on the command line.  The first child would bind to
       
  1216  port 4001 in our example.  Each child sets the send and receive buffers to
       
  1217  exactly fit the number of messages, requests and acks, that will be in flight
       
  1218  as determind by the command line arguments.
       
  1219 -.Pp
       
  1220 +.PP
       
  1221  The children then enter their loop.  They will keep a number of sent messages
       
  1222  outstanding as specified by the -d option.  When they reach this limit they
       
  1223  will wait to receive acks which will allow them to send again.  As they receive
       
  1224  messages from their peers they immediately send acks.
       
  1225 -.Pp
       
  1226 +.PP
       
  1227  Every second, the parent process will display statistics of the ongoing
       
  1228  stress test. The output is described in section OUTPUT below.
       
  1229 -.Pp
       
  1230 +.PP
       
  1231  If the -T option is given, the test will terminate after the specified time,
       
  1232  and a summary is printed.
       
  1233 -.Pp
       
  1234 +.PP
       
  1235  Each child maintains outstanding messages to all other children of the other instance.
       
  1236  They do not send to their siblings.
       
  1237 -.Sh OPTIONS
       
  1238 +.SH OPTIONS
       
  1239 +.PP
       
  1240  The following options are available for use on the command line:
       
  1241 -.Bl -tag -width Ds
       
  1242 -.It Fl p Ar port_number
       
  1243 +
       
  1244 +.TP 7
       
  1245 +\fB\-p  port_number
       
  1246  Each parent binds a TCP socket to this port number and their respective
       
  1247  address.  They will trade the negotiated options over this socket.  Each
       
  1248  child will bind an RDS socket to the range of ports immediately following
       
  1249  this port number, for as many children as there are.
       
  1250 -.It Fl s Ar send_address
       
  1251 +.TP
       
  1252 +\fB\-s send_address
       
  1253  A connection attempt is made to this address.  Once its complete and the
       
  1254  options are sent over it then children will be created and work will proceed.
       
  1255 -.It Fl r Ar receive_address
       
  1256 +.TP
       
  1257 +\fB\-r receive_address
       
  1258  This specifies the address that messages will be sent from.  If -s is not
       
  1259  specified then rds-stress waits for a connection on this address before
       
  1260  proceeding.
       
  1261 -.Pp
       
  1262 +
       
  1263  If this option is not given, rds-stress will choose an appropriate address.
       
  1264  The passive process will accept connections on all local interfaces, and
       
  1265  obtain the address once the control connection is established.
       
  1266  The active process will choose a local address based on the interface through
       
  1267  which it connects to the destination address.
       
  1268 -.It Fl a Ar ack_bytes
       
  1269 +.TP
       
  1270 +\fB\-a ack_bytes
       
  1271  This specifies the size of the ack messages, in bytes. There is a minimum size
       
  1272  which depends on the format of the ack messages, which may change over time.
       
  1273  See section "Message Sizes" below.
       
  1274 -.It Fl q Ar request_bytes
       
  1275 +.TP
       
  1276 +\fB\-q request_bytes
       
  1277  This specifies the size of the request messages, in bytes.
       
  1278  It also has a minimum size which may change over time.
       
  1279  See section "Message Sizes" below.
       
  1280 -.It Fl D Ar rdma_bytes
       
  1281 +.TP
       
  1282 +\fB\-D rdma_bytes
       
  1283  RDSv3 is capable of transmitting part of a message via RDMA directly from
       
  1284  application buffer to application buffer. This option enables RDMA support
       
  1285  in rds-stress: request packets include parameters for an RDMA READ or WRITE
       
  1286 @@ -100,20 +104,25 @@
       
  1287  operation, which the receiving process executes at the time the ACK packet
       
  1288  is sent.
       
  1289  See section "Message Sizes" below.
       
  1290 -.It Fl d Ar queue_depth
       
  1291 +.TP
       
  1292 +\fB\-d queue_depth
       
  1293  Each child will try to maintain this many sent messages outstanding to each
       
  1294  of its peers on the remote address.
       
  1295 -.It Fl t Ar nr_tasks
       
  1296 +.TP
       
  1297 +\fB\-t nr_tasks
       
  1298  Each parent will create this many children tasks.
       
  1299 -.It Fl T Ar seconds
       
  1300 +.TP
       
  1301 +\fB\-T seconds
       
  1302  Specify the duration of the test run. After the specified number of seconds,
       
  1303  all processes on both ends of the connection will terminate, and the
       
  1304  active instance will print a summary. By default, rds-stress will keep
       
  1305  on sending and receiving messages.
       
  1306 -.It Fl z
       
  1307 +.TP
       
  1308 +\fB\-z
       
  1309  This flag can be used in conjunction with -T. It suppresses the ongoing
       
  1310  display of statistics, and prints a summary only.
       
  1311 -.It Fl c
       
  1312 +.TP
       
  1313 +\fB\-c
       
  1314  This causes rds-stress to create child tasks which just consume CPU cycles.
       
  1315  One task is created for each CPU in the system.  First each child observes the
       
  1316  maximum rate at which it can consume cycles.  This means that this option
       
  1317 @@ -121,50 +130,78 @@
       
  1318  use of the system by observing the lesser rate at which the children consume
       
  1319  cycles.  This option is *not* shared between the active and passive instances.
       
  1320  It must be specified on each rds-stress command line.
       
  1321 -.It Fl R
       
  1322 +.TP
       
  1323 +\fB\-R
       
  1324  This tells the rds-stress parent process to run with SCHED_RR priority,
       
  1325  giving it precedence over the child processes. This is useful when running
       
  1326  with lots of tasks, where there is a risk of the child processes starving
       
  1327  the parent, and skewing the results.
       
  1328 -.It Fl v
       
  1329 +.TP
       
  1330 +\fB\-v
       
  1331  With this option enabled, packets are filled with a pattern that is
       
  1332  verified by the receiver. This check can help detect data corruption
       
  1333  occuring under high load.
       
  1334 -.El
       
  1335 -.Pp
       
  1336 +.TP
       
  1337 +\fB\-o
       
  1338 +Datagrams sent one way only (default is both)
       
  1339 +.TP
       
  1340 +\fB\-I iovecs
       
  1341 +RDMA: number of user buffers to target (default is 1, max is 512)
       
  1342 +.TP
       
  1343 +\fB\-M nr
       
  1344 +RDMA: mode (0=readwrite,1=readonly,2=writeonly)
       
  1345 +.TP
       
  1346 +\fB\-g lgrpid
       
  1347 +bind the process to the specified lgrp
       
  1348 +.PP
       
  1349  
       
  1350 -.Ss Message Sizes
       
  1351 +.SS Message Sizes
       
  1352  Options which set a message size (such as -a) specify a number of bytes
       
  1353  by default. By appending \fBK\fP, \fBM\fP, or \fBG\fP, you can specify the size
       
  1354  in kilobytes, megabytes or gigabytes, respectively. For instance,
       
  1355  the following will run rds-stress with a message and ACK size of 1024
       
  1356  bytes, and an RDMA message size of 1048576 bytes:
       
  1357 -.Pp
       
  1358 -.Dl rds-stress ... -q 1K -a 1K -D 1M
       
  1359 -.Pp
       
  1360 -.Pp
       
  1361 -.Sh OUTPUT
       
  1362 +.PP
       
  1363 +.RS 12
       
  1364 +rds-stress ... -q 1K -a 1K -D 1M
       
  1365 +.RE
       
  1366 +.PP
       
  1367 +.PP
       
  1368 +.SH OUTPUT
       
  1369  Each parent outputs columns of statistics at a regular interval:
       
  1370 -.Bl -tag -width Ds
       
  1371 -.It tsks
       
  1372 +.TP 8
       
  1373 +tsks
       
  1374  The number of child tasks which are running.
       
  1375 -.It tx/s
       
  1376 +.TP
       
  1377 +tx/s
       
  1378  The number of sendmsg() calls that all children are executing, per second. 
       
  1379 -.It tx+rx K/s
       
  1380 +.TP
       
  1381 +rx/s
       
  1382 +The number of recvmsg() calls that all children are executing, per second. 
       
  1383 +.TP
       
  1384 +tx+rx K/s
       
  1385  The total number of bytes that are flowing through sendmsg() and recvmsg() for all children.
       
  1386  This includes both request and ack messages.
       
  1387 -.It rw+rr K/s
       
  1388 -The total number of bytes that are being transferred via RDMA READs and
       
  1389 +.TP
       
  1390 +mbi K/s
       
  1391 +The total number of bytes that are being received via RDMA READs and
       
  1392  WRITEs for all children.
       
  1393 -.It tx us/c
       
  1394 +.TP
       
  1395 +mbi K/s
       
  1396 +The total number of bytes that are being transmited via RDMA READs and
       
  1397 +WRITEs for all children.
       
  1398 +.TP
       
  1399 +tx us/c
       
  1400  The average number of microseconds spent in sendmsg() calls.
       
  1401 -.It rtt us
       
  1402 +.TP
       
  1403 +rtt us
       
  1404  The average round trip time for a request and ack message pair.  This measures
       
  1405  the total time between when a task sends a request and when it finally receives
       
  1406  the ack for that message.  Because it includes the time it takes for the
       
  1407  receiver to wake up, receive the message, and send an ack, it can grow to be
       
  1408  quite large under load. 
       
  1409 -.It cpu %
       
  1410 +.TP
       
  1411 +cpu %
       
  1412  This is the percentage of available CPU resources on this machine that are being
       
  1413  consumed since rds-stress started running.  It will show -1.00 if -c is not
       
  1414  given.  It is calculated based on the amount of CPU resources that CPU soaking
       
  1415 @@ -171,4 +208,3 @@
       
  1416  tasks are able to consume.  This lets it measure CPU use by the system, say in
       
  1417  interrupt handlers, that task-based CPU accounting does not include.
       
  1418  For this to work rds-stress must be started with -c on an idle system.
       
  1419 -.El
       
  1420 diff -r -u /tmp/rds-tools-2.0.4/include/rds.h rds-tools-2.0.7/include/rds.h
       
  1421 --- /tmp/rds-tools-2.0.4/include/rds.h	Wed Aug  4 15:25:11 2010
       
  1422 +++ rds-tools-2.0.7/include/rds.h	Thu Feb 24 13:30:23 2011
       
  1423 @@ -84,6 +84,8 @@
       
  1424  #define RDS_CMSG_CONG_UPDATE		5
       
  1425  #define RDS_CMSG_ATOMIC_FADD		6
       
  1426  #define RDS_CMSG_ATOMIC_CSWP		7
       
  1427 +#define	RDS_CMSG_MASKED_ATOMIC_FADD	8
       
  1428 +#define	RDS_CMSG_MASKED_ATOMIC_CSWP	9
       
  1429  
       
  1430  #define RDS_INFO_FIRST			10000
       
  1431  #define RDS_INFO_COUNTERS		10000
       
  1432 @@ -252,8 +254,25 @@
       
  1433  	rds_rdma_cookie_t cookie;
       
  1434  	uint64_t 	local_addr;
       
  1435  	uint64_t 	remote_addr;
       
  1436 -	uint64_t	swap_add;
       
  1437 -	uint64_t	compare;
       
  1438 +	union {
       
  1439 +		struct {
       
  1440 +			uint64_t	compare;
       
  1441 +			uint64_t	swap;
       
  1442 +		} cswp;
       
  1443 +		struct {
       
  1444 +			uint64_t	add;
       
  1445 +		} fadd;
       
  1446 +		struct {
       
  1447 +			uint64_t	compare;
       
  1448 +			uint64_t	swap;
       
  1449 +			uint64_t	compare_mask;
       
  1450 +			uint64_t	swap_mask;
       
  1451 +		} m_cswp;
       
  1452 +		struct {
       
  1453 +			uint64_t	add;
       
  1454 +			uint64_t	nocarry_mask;
       
  1455 +		} m_fadd;
       
  1456 +	};
       
  1457  	u_int64_t	flags;
       
  1458  	u_int64_t	user_token;
       
  1459  };
       
  1460 @@ -278,5 +297,6 @@
       
  1461  #define RDS_RDMA_USE_ONCE	0x0008	/* free MR after use */
       
  1462  #define RDS_RDMA_DONTWAIT	0x0010	/* Don't wait in SET_BARRIER */
       
  1463  #define RDS_RDMA_NOTIFY_ME	0x0020	/* Notify when operation completes */
       
  1464 +#define RDS_RDMA_SILENT		0x0040	/* Do not interrupt remote */
       
  1465  
       
  1466  #endif /* IB_RDS_H */