|
1 diff -r -u /tmp/rds-tools-2.0.4/rds-stress.c rds-tools-2.0.7/rds-stress.c |
|
2 --- /tmp/rds-tools-2.0.4/rds-stress.c Wed Aug 4 15:25:10 2010 |
|
3 +++ rds-tools-2.0.7/rds-stress.c Thu Feb 24 13:27:52 2011 |
|
4 @@ -15,7 +15,13 @@ |
|
5 #include <sys/time.h> |
|
6 #include <time.h> |
|
7 #include <inttypes.h> |
|
8 +#if defined(__SVR4) && defined(__sun) |
|
9 +#include <sys/syscall.h> |
|
10 +#include <signal.h> |
|
11 +#include <sys/lgrp_user.h> |
|
12 +#else |
|
13 #include <syscall.h> |
|
14 +#endif |
|
15 #include <sys/stat.h> |
|
16 #include <sys/poll.h> |
|
17 #include <ctype.h> |
|
18 @@ -22,8 +28,13 @@ |
|
19 #include <fcntl.h> |
|
20 #include <sched.h> |
|
21 #include <getopt.h> |
|
22 +#if !(defined(__SVR4) && defined(__sun)) |
|
23 #include <byteswap.h> |
|
24 #include "rds.h" |
|
25 +#else |
|
26 +#include <infiniband/ofa_solaris.h> |
|
27 +#include <sys/rds.h> |
|
28 +#endif |
|
29 |
|
30 #include "pfhack.h" |
|
31 |
|
32 @@ -110,6 +121,7 @@ |
|
33 struct child_control { |
|
34 pid_t pid; |
|
35 int ready; |
|
36 + int stopping; |
|
37 struct timeval start; |
|
38 struct counter cur[NR_STATS]; |
|
39 struct counter last[NR_STATS]; |
|
40 @@ -254,7 +266,20 @@ |
|
41 |
|
42 die("invalid host name or dotted quad '%s'\n", ptr); |
|
43 } |
|
44 +#if defined(__SVR4) && defined(__sun) |
|
45 +static lgrp_id_t lgrp_id = -1; |
|
46 |
|
47 +static void |
|
48 +set_my_lgrp(void) |
|
49 +{ |
|
50 + if (lgrp_id != -1) { |
|
51 + lgrp_affinity_set(P_LWPID, P_MYID, lgrp_id, |
|
52 + LGRP_AFF_STRONG); |
|
53 + yield(); /* force a context switch */ |
|
54 + } |
|
55 +} |
|
56 +#endif |
|
57 + |
|
58 static void usage(void) |
|
59 { |
|
60 fprintf(stderr, "rds-stress version %s\n", RDS_VERSION); |
|
61 @@ -281,6 +306,9 @@ |
|
62 " -c measure cpu use with per-cpu soak processes\n" |
|
63 " -V trace execution\n" |
|
64 " -z print a summary at end of test only\n" |
|
65 +#if defined(__SVR4) && defined(__sun) |
|
66 + " -g [lgrpid] bind the process to the specified lgrp\n" |
|
67 +#endif |
|
68 "\n" |
|
69 "Example:\n" |
|
70 " recv$ rds-stress\n" |
|
71 @@ -310,7 +338,7 @@ |
|
72 static void check_parent(pid_t pid) |
|
73 { |
|
74 if (pid != getppid()) |
|
75 - die("parent %u exited\n", pid); |
|
76 + die("parent %u exited\n", (int)pid); |
|
77 } |
|
78 |
|
79 /* |
|
80 @@ -334,6 +362,7 @@ |
|
81 msg_pattern[i] = k; |
|
82 } |
|
83 |
|
84 +#if !(defined(__SVR4) && defined(__sun)) |
|
85 #if __BYTE_ORDER == __LITTLE_ENDIAN |
|
86 #define htonll(x) bswap_64(x) |
|
87 #define ntohll(x) bswap_64(x) |
|
88 @@ -341,6 +370,7 @@ |
|
89 #define htonll(x) (x) |
|
90 #define ntohll(x) (x) |
|
91 #endif |
|
92 +#endif /* Not sun */ |
|
93 |
|
94 static void encode_hdr(struct header *dst, const struct header *hdr) |
|
95 { |
|
96 @@ -584,7 +614,11 @@ |
|
97 if (opts->receive_addr == 0) |
|
98 return 1; |
|
99 |
|
100 +#if defined(__SVR4) && defined(__sun) |
|
101 + sin.sin_family = AF_INET_OFFLOAD; |
|
102 +#else |
|
103 sin.sin_family = AF_INET; |
|
104 +#endif |
|
105 sin.sin_port = htons(opts->starting_port); |
|
106 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
|
107 |
|
108 @@ -677,7 +711,11 @@ |
|
109 size = sizeof(struct rdma_key_o_meter) |
|
110 + 2 * nr_tasks * sizeof(*kt) |
|
111 + 2 * RDMA_MAX_TRACKED_KEYS * sizeof(*ks); |
|
112 +#if defined(__SVR4) && defined(__sun) |
|
113 + base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
|
114 +#else |
|
115 base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
|
116 +#endif |
|
117 if (base == MAP_FAILED) |
|
118 die_errno("alloc_rdma_buffers: mmap failed"); |
|
119 |
|
120 @@ -828,7 +866,7 @@ |
|
121 } |
|
122 |
|
123 if (!failed) |
|
124 - trace("compare pass pattern %Lx addr %p\n", |
|
125 + trace("compare pass pattern 0x%Lx addr %p\n", |
|
126 (unsigned long long) pattern, addr); |
|
127 } |
|
128 |
|
129 @@ -865,7 +903,11 @@ |
|
130 /* We use mmap here rather than malloc, because it is always |
|
131 * page aligned. */ |
|
132 len = 2 * opts->nr_tasks * opts->req_depth * (opts->rdma_vector * opts->rdma_size) + sys_page_size; |
|
133 +#if defined(__SVR4) && defined(__sun) |
|
134 + base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
|
135 +#else |
|
136 base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); |
|
137 +#endif |
|
138 if (base == MAP_FAILED) |
|
139 die_errno("alloc_rdma_buffers: mmap failed"); |
|
140 memset(base, 0x2f, len); |
|
141 @@ -915,17 +957,16 @@ |
|
142 if (RDMA_OP_READ == hdr->rdma_op) { |
|
143 if (opt.verify) |
|
144 rds_fill_buffer(rdma_addr, rdma_size, hdr->rdma_pattern); |
|
145 - trace("Requesting RDMA read for pattern %Lx " |
|
146 - "local addr to rdma read %p\n", |
|
147 - (unsigned long long) hdr->rdma_pattern, |
|
148 + trace("Requesting RDMA read for pattern 0x%Lx" |
|
149 + "local addr to rdma read 0x%p\n", |
|
150 + hdr->rdma_pattern, |
|
151 rdma_addr); |
|
152 } else { |
|
153 if (opt.verify) |
|
154 rds_fill_buffer(rdma_addr, rdma_size, 0); |
|
155 - trace("Requesting RDMA write for pattern %Lx " |
|
156 - "local addr to rdma write %p\n", |
|
157 - (unsigned long long) hdr->rdma_pattern, |
|
158 - rdma_addr); |
|
159 + |
|
160 + trace("Requesting RDMA write for pattern 0x%Lx", |
|
161 + hdr->rdma_pattern); |
|
162 } |
|
163 } |
|
164 |
|
165 @@ -947,7 +988,7 @@ |
|
166 die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op); |
|
167 |
|
168 |
|
169 - trace("RDS received request to issue rdma %s len %lu rva %Lx key %Lx pattern %Lx\n", |
|
170 + trace("RDS received request to issue rdma %s len %lu rva 0x%Lx key 0x%Lx pattern 0x%Lx\n", |
|
171 in_hdr->rdma_op == RDMA_OP_WRITE? "write to" : "read from", |
|
172 rdma_size, |
|
173 (unsigned long long) in_hdr->rdma_addr, |
|
174 @@ -1007,6 +1048,9 @@ |
|
175 t->drain_rdmas = 0; |
|
176 } |
|
177 |
|
178 +#if defined(__SVR4) && defined(__sun) |
|
179 +#undef MSG_MAXIOVLEN |
|
180 +#endif |
|
181 #define MSG_MAXIOVLEN 2 |
|
182 |
|
183 /* |
|
184 @@ -1560,7 +1604,12 @@ |
|
185 struct timeval start; |
|
186 int do_work = opts->simplex ? active : 1; |
|
187 |
|
188 +#if defined(__SVR4) && defined(__sun) |
|
189 + set_my_lgrp(); |
|
190 + sin.sin_family = AF_INET_OFFLOAD; |
|
191 +#else |
|
192 sin.sin_family = AF_INET; |
|
193 +#endif |
|
194 sin.sin_port = htons(opts->starting_port + 1 + id); |
|
195 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
|
196 |
|
197 @@ -1572,7 +1621,11 @@ |
|
198 for (i = 0; i < opts->nr_tasks; i++) { |
|
199 tasks[i].nr = i; |
|
200 tasks[i].src_addr = sin; |
|
201 +#if defined(__SVR4) && defined(__sun) |
|
202 + tasks[i].dst_addr.sin_family = AF_INET_OFFLOAD; |
|
203 +#else |
|
204 tasks[i].dst_addr.sin_family = AF_INET; |
|
205 +#endif |
|
206 tasks[i].dst_addr.sin_addr.s_addr = htonl(opts->send_addr); |
|
207 tasks[i].dst_addr.sin_port = htons(opts->starting_port + 1 + i); |
|
208 tasks[i].send_time = alloca(opts->req_depth * sizeof(struct timeval)); |
|
209 @@ -1625,6 +1678,10 @@ |
|
210 ; |
|
211 } |
|
212 |
|
213 + /* stop sending if in shutdown phase */ |
|
214 + if (ctl->stopping) |
|
215 + continue; |
|
216 + |
|
217 /* keep the pipeline full */ |
|
218 can_send = !!(pfd.revents & POLLOUT); |
|
219 for (i = 0, t = tasks; i < opts->nr_tasks; i++, t++) { |
|
220 @@ -1665,8 +1722,12 @@ |
|
221 uint32_t i; |
|
222 |
|
223 len = opts->nr_tasks * sizeof(*ctl); |
|
224 +#if defined(__SVR4) && defined(__sun) |
|
225 + ctl = (struct child_control *)mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
|
226 +#else |
|
227 ctl = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, |
|
228 0, 0); |
|
229 +#endif |
|
230 if (ctl == MAP_FAILED) |
|
231 die("mmap of %u child control structs failed", opts->nr_tasks); |
|
232 |
|
233 @@ -1699,7 +1760,7 @@ |
|
234 continue; |
|
235 pid = waitpid(-1, NULL, WNOHANG); |
|
236 if (pid) |
|
237 - die("child %u (pid %u) exited\n", i, pid); |
|
238 + die("child %u (pid %u) exited\n", i, (int)pid); |
|
239 sleep(1); |
|
240 i--; /* try this child again */ |
|
241 } |
|
242 @@ -1967,7 +2028,7 @@ |
|
243 |
|
244 pid = waitpid(-1, &status, wflags); |
|
245 if (pid < 0) |
|
246 - die("waitpid returned %u", pid); |
|
247 + die("waitpid returned %u", (int)pid); |
|
248 if (pid == 0) |
|
249 return 0; |
|
250 |
|
251 @@ -1975,15 +2036,15 @@ |
|
252 if (WEXITSTATUS(status) == 0) |
|
253 return 1; |
|
254 die("child pid %u exited with status %d\n", |
|
255 - pid, WEXITSTATUS(status)); |
|
256 + (int)pid, WEXITSTATUS(status)); |
|
257 } |
|
258 if (WIFSIGNALED(status)) { |
|
259 if (WTERMSIG(status) == SIGTERM) |
|
260 return 1; |
|
261 die("child pid %u exited with signal %d\n", |
|
262 - pid, WTERMSIG(status)); |
|
263 + (int)pid, WTERMSIG(status)); |
|
264 } |
|
265 - die("child pid %u wait status %d\n", pid, status); |
|
266 + die("child pid %u wait status %d\n", (int)pid, status); |
|
267 } |
|
268 |
|
269 static void release_children_and_wait(struct options *opts, |
|
270 @@ -2139,7 +2200,12 @@ |
|
271 control_fd = -1; |
|
272 |
|
273 if (nr_running) { |
|
274 + /* let everything gracefully stop before we kill the chillins */ |
|
275 for (i = 0; i < opts->nr_tasks; i++) |
|
276 + ctl[i].stopping = 1; |
|
277 + sleep(1); |
|
278 + |
|
279 + for (i = 0; i < opts->nr_tasks; i++) |
|
280 kill(ctl[i].pid, SIGTERM); |
|
281 stop_soakers(soak_arr); |
|
282 } |
|
283 @@ -2517,7 +2583,11 @@ |
|
284 /* an extra terminating entry which will be all 0s */ |
|
285 len = (nr_soak + 1) * sizeof(struct soak_control); |
|
286 soak_arr = mmap(NULL, len, PROT_READ|PROT_WRITE, |
|
287 +#if defined(__SVR4) && defined(__sun) |
|
288 + MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
|
289 +#else |
|
290 MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
|
291 +#endif |
|
292 if (soak_arr == MAP_FAILED) |
|
293 die("mmap of %ld soak control structs failed", nr_soak); |
|
294 |
|
295 @@ -2589,6 +2659,7 @@ |
|
296 { "rtprio", no_argument, NULL, 'R' }, |
|
297 { "verify", no_argument, NULL, 'v' }, |
|
298 { "trace", no_argument, NULL, 'V' }, |
|
299 +{ "lgrpid", required_argument, NULL, 'g' }, |
|
300 |
|
301 { "rdma-use-once", required_argument, NULL, OPT_RDMA_USE_ONCE }, |
|
302 { "rdma-use-get-mr", required_argument, NULL, OPT_RDMA_USE_GET_MR }, |
|
303 @@ -2652,7 +2723,7 @@ |
|
304 while(1) { |
|
305 int c, index; |
|
306 |
|
307 - c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVz", |
|
308 + c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVg:z", |
|
309 long_options, &index); |
|
310 if (c == -1) |
|
311 break; |
|
312 @@ -2711,6 +2782,10 @@ |
|
313 case 'V': |
|
314 opts.tracing = 1; |
|
315 break; |
|
316 + case 'g': |
|
317 + lgrp_id = (lgrp_id_t)parse_ull(optarg, |
|
318 + (uint32_t)~0); |
|
319 + break; |
|
320 case OPT_USE_CONG_MONITOR: |
|
321 opts.use_cong_monitor = parse_ull(optarg, 1); |
|
322 break; |
|
323 @@ -2786,6 +2861,7 @@ |
|
324 if (opts.rdma_size && 0) |
|
325 opts.rdma_size = (opts.rdma_size + 4095) & ~4095; |
|
326 |
|
327 + set_my_lgrp(); |
|
328 opt = opts; |
|
329 return active_parent(&opts, soak_arr); |
|
330 } |
|
331 diff -r -u /tmp/rds-tools-2.0.4/pfhack.h rds-tools-2.0.7/pfhack.h |
|
332 --- /tmp/rds-tools-2.0.4/pfhack.h Wed Aug 4 15:25:11 2010 |
|
333 +++ rds-tools-2.0.7/pfhack.h Thu Feb 24 13:27:51 2011 |
|
334 @@ -44,9 +44,11 @@ |
|
335 #ifndef __PF_HACK_H |
|
336 #define __PF_HACK_H |
|
337 |
|
338 +#if !((defined(__SVR4) && defined(__sun))) |
|
339 #define PF_RDS 21 |
|
340 #define AF_RDS 21 |
|
341 #define SOL_RDS 276 |
|
342 +#endif |
|
343 |
|
344 extern int discover_pf_rds(); |
|
345 extern int discover_sol_rds(); |
|
346 diff -r -u /tmp/rds-tools-2.0.4/rds-info.c rds-tools-2.0.7/rds-info.c |
|
347 --- /tmp/rds-tools-2.0.4/rds-info.c Wed Aug 4 15:25:10 2010 |
|
348 +++ rds-tools-2.0.7/rds-info.c Thu Feb 24 13:27:51 2011 |
|
349 @@ -42,16 +42,27 @@ |
|
350 #include <sys/types.h> |
|
351 #include <sys/socket.h> |
|
352 #include <errno.h> |
|
353 +#if defined(__SVR4) && defined(__sun) |
|
354 +#include <strings.h> |
|
355 +#else |
|
356 #include <string.h> |
|
357 +#endif |
|
358 #include <inttypes.h> |
|
359 #include <netinet/in.h> |
|
360 #include <arpa/inet.h> |
|
361 |
|
362 +#if defined(__SVR4) && defined(__sun) |
|
363 +#include <sys/rds.h> |
|
364 +#else |
|
365 #include "rds.h" |
|
366 +#endif |
|
367 #include "pfhack.h" |
|
368 |
|
369 +/* WHUPS changed the struct rds_info_connection definition b/w rds in 1.4 & 1.5. gotta support both |
|
370 + for now. TODO remove check of transport[15] once ofed pre-1.5 is extinct. */ |
|
371 #define rds_conn_flag(conn, flag, letter) \ |
|
372 - (conn.flags & RDS_INFO_CONNECTION_FLAG_##flag ? letter : '-') |
|
373 + (conn.flags & RDS_INFO_CONNECTION_FLAG_##flag \ |
|
374 + || conn.transport[15] & RDS_INFO_CONNECTION_FLAG_##flag ? letter : '-') |
|
375 |
|
376 #define min(a, b) (a < b ? a : b) |
|
377 #define array_size(foo) (sizeof(foo) / sizeof(foo[0])) |
|
378 @@ -234,8 +245,10 @@ |
|
379 print_msgs, "Send", 0 }, |
|
380 ['t'] = { RDS_INFO_RETRANS_MESSAGES, "retransmit queue messages", |
|
381 print_msgs, "Retransmit", 0 }, |
|
382 +#if !(defined(__SVR4) && defined(__sun)) |
|
383 ['T'] = { RDS_INFO_TCP_SOCKETS, "TCP transport sockets", |
|
384 print_tcp_socks, NULL, 0 }, |
|
385 +#endif |
|
386 ['I'] = { RDS_INFO_IB_CONNECTIONS, "IB transport connections", |
|
387 print_ib_conns, NULL, 0 }, |
|
388 }; |
|
389 @@ -266,6 +279,10 @@ |
|
390 char optstring[258] = "v+"; |
|
391 int given_options = 0; |
|
392 socklen_t len = 0; |
|
393 +#if defined(__SVR4) && defined(__sun) |
|
394 + socklen_t ulen; |
|
395 + struct rds_info_arg arg; |
|
396 +#endif |
|
397 void *data = NULL; |
|
398 int fd; |
|
399 int each; |
|
400 @@ -322,6 +339,7 @@ |
|
401 (given_options && !infos[i].option_given)) |
|
402 continue; |
|
403 |
|
404 +#if !(defined(__SVR4) && defined(__sun)) |
|
405 /* read in the info until we get a full snapshot */ |
|
406 while ((each = getsockopt(fd, sol, infos[i].opt_val, data, |
|
407 &len)) < 0) { |
|
408 @@ -345,15 +363,60 @@ |
|
409 return 1; |
|
410 } |
|
411 } |
|
412 +#else |
|
413 + /* 1st call gets the length of the data available */ |
|
414 + ulen = 0; |
|
415 + bzero(&arg, sizeof (struct rds_info_arg)); |
|
416 + arg.lenp = (uint64_t)(uintptr_t)&ulen; |
|
417 + arg.datap = NULL; |
|
418 + each = ioctl(fd, infos[i].opt_val, &arg); |
|
419 + if ((each < 0) && (errno != ENOSPC)) { |
|
420 + verbosef(0, stderr, "%s: Unable get statistics: %s\n", |
|
421 + progname, strerror(errno)); |
|
422 + return 1; |
|
423 + } |
|
424 |
|
425 + /* No data at the driver */ |
|
426 + if (ulen == 0) |
|
427 + invalid_opt = 1;; |
|
428 +#endif |
|
429 + |
|
430 if (invalid_opt) |
|
431 continue; |
|
432 |
|
433 +#if !(defined(__SVR4) && defined(__sun)) |
|
434 infos[i].print(data, each, len, infos[i].extra); |
|
435 +#else |
|
436 + do { |
|
437 + arg.datap = (uint64_t)(uintptr_t)realloc( |
|
438 + (char *)(uintptr_t)arg.datap, ulen); |
|
439 + if (arg.datap == NULL) { |
|
440 + verbosef(0, stderr, "%s: Unable to allocate memory " |
|
441 + "for %u bytes of info: %s\n", |
|
442 + progname, ulen, strerror(errno)); |
|
443 + return 1; |
|
444 + } |
|
445 |
|
446 + /* 2nd call gets the data */ |
|
447 + len = ulen; |
|
448 + each = ioctl(fd, infos[i].opt_val, &arg); |
|
449 + if ((each < 0) && (errno != ENOSPC)) { |
|
450 + verbosef(0, stderr, |
|
451 + "%s: Unable get statistics: %s\n", |
|
452 + progname, strerror(errno)); |
|
453 + return 1; |
|
454 + } |
|
455 + } while (ulen > len); |
|
456 + |
|
457 + infos[i].print((void *)(uintptr_t)arg.datap, each, ulen, |
|
458 + infos[i].extra); |
|
459 +#endif |
|
460 if (given_options && --given_options == 0) |
|
461 break; |
|
462 } |
|
463 +#if defined(__SVR4) && defined(__sun) |
|
464 + free((void *)(uintptr_t)arg.datap); |
|
465 +#endif |
|
466 |
|
467 return 0; |
|
468 } |
|
469 diff -r -u /tmp/rds-tools-2.0.4/rds.7 rds-tools-2.0.7/rds.7 |
|
470 --- /tmp/rds-tools-2.0.4/rds.7 Wed Aug 4 15:25:11 2010 |
|
471 +++ rds-tools-2.0.7/rds.7 Thu Feb 24 13:27:52 2011 |
|
472 @@ -6,6 +6,7 @@ |
|
473 .nf |
|
474 .B #include <sys/socket.h> |
|
475 .B #include <netinet/in.h> |
|
476 +.B #include <sys/rds.h> |
|
477 .fi |
|
478 .SH DESCRIPTION |
|
479 This is an implementation of the RDS socket API. It provides reliable, |
|
480 @@ -14,18 +15,13 @@ |
|
481 Currently, RDS can be transported over Infiniband, and loopback. |
|
482 RDS over TCP is disabled, but will be re-enabled in the near future. |
|
483 .PP |
|
484 -RDS uses standard |
|
485 -.B AF_INET |
|
486 -addresses as described in |
|
487 -.BR ip (7) |
|
488 +RDS uses |
|
489 +.B AF_INET_OFFLOAD address family |
|
490 to identify end points. |
|
491 .\"------------------------------------------------------------------ |
|
492 .SS Socket Creation |
|
493 RDS is still in development and as such does not have a reserved protocol |
|
494 -family constant. Applications must read the string representation of the |
|
495 -protocol family value from the |
|
496 -.B pf_rds |
|
497 -sysctl parameter file described below. |
|
498 +family constant. Applications should use AF_INET_OFFLOAD. |
|
499 .PP |
|
500 .nf |
|
501 .B rds_socket = socket(pf_rds, SOCK_SEQPACKET, 0); |
|
502 @@ -58,9 +54,6 @@ |
|
503 .BR SOL_RDS ). |
|
504 Just as with the RDS protocol family, an official value has not been |
|
505 assigned yet, so the kernel will assign a value dynamically. |
|
506 -The assigned value can be retrieved from the |
|
507 -.B sol_rds |
|
508 -sysctl parameter file. |
|
509 .PP |
|
510 RDS specific socket options will be described in a separate section |
|
511 below. |
|
512 @@ -77,7 +70,7 @@ |
|
513 .PP |
|
514 For instance, when binding to the address of an Infiniband interface |
|
515 such as |
|
516 -.BR ib0 , |
|
517 +.BR ibd0 , |
|
518 the socket will use the Infiniband transport. If RDS is not able |
|
519 to associate a transport with the given address, it will return |
|
520 .BR EADDRNOTAVAIL . |
|
521 @@ -394,47 +387,6 @@ |
|
522 be delivered in the order they're sent. Messages sent from different |
|
523 sockets, or to different destinations, may be delivered in any order. |
|
524 .\"------------------------------------------------------------------ |
|
525 -.SH SYSCTL VALUES |
|
526 -These parameteres may only be accessed through their files in |
|
527 -.BR /proc/sys/net/rds . |
|
528 -Access through |
|
529 -.BR sysctl (2) |
|
530 -is not supported. |
|
531 -.TP |
|
532 -.B pf_rds |
|
533 -This file contains the string representation of the protocol family |
|
534 -constant passed to |
|
535 -.BR socket (2) |
|
536 -to create a new RDS socket. |
|
537 -.TP |
|
538 -.B sol_rds |
|
539 -This file contains the string representation of the socket level parameter |
|
540 -that is passed to |
|
541 -.BR getsockopt (2) |
|
542 -and |
|
543 -.BR setsockopt (2) |
|
544 -to manipulate RDS socket options. |
|
545 -.TP |
|
546 -.BR max_unacked_bytes " and " max_unacked_packets |
|
547 -These parameters are used to tune the generation of acknowledgements. By |
|
548 -default, the system receiving RDS messages does not send back explicit |
|
549 -acknowledgements unless it transmits a message of its own (in which |
|
550 -case the ACK is piggybacked onto the outgoing message), or when the sending |
|
551 -system requests an ACK. |
|
552 -.IP |
|
553 -However, the sender needs to see an ACK from time to time so that it |
|
554 -can purge old messages from the send queue. The unacked bytes and |
|
555 -packet counters are used to keep track of how much data has been |
|
556 -sent without requesting an ACK. The default is to request an acknowledgement |
|
557 -every 16 packets, or every 16 MB, whichever comes first. |
|
558 -.TP |
|
559 -.BR reconnect_delay_min_ms " and " reconnect_delay_max_ms |
|
560 -RDS uses host-to-host connections to transport RDS messages (both for the TCP |
|
561 -and the Infiniband transport). If this connection breaks, RDS will try to |
|
562 -re-establish the connection. Because this reconnect may be triggered by |
|
563 -both hosts at the same time and fail, RDS uses a random backoff before attempting |
|
564 -a reconnect. These two parameters specify the minimum and maximum delay in |
|
565 -milliseconds. The default values are 1 and 1000, respectively. |
|
566 .SH SEE ALSO |
|
567 .BR rds-rdma (7), |
|
568 .BR socket (2), |
|
569 diff -r -u /tmp/rds-tools-2.0.4/rds-info.1 rds-tools-2.0.7/rds-info.1 |
|
570 --- /tmp/rds-tools-2.0.4/rds-info.1 Wed Aug 4 15:25:11 2010 |
|
571 +++ rds-tools-2.0.7/rds-info.1 Thu Feb 24 13:27:51 2011 |
|
572 @@ -1,162 +1,150 @@ |
|
573 -.Dd October 30, 2006 |
|
574 -.Dt RDS-INFO 1 |
|
575 -.Os |
|
576 -.Sh NAME |
|
577 -.Nm rds-info |
|
578 -.Nd display information from the RDS kernel module |
|
579 -.Pp |
|
580 -.Sh SYNOPSIS |
|
581 -.Nm rds-info |
|
582 -.Op Fl v |
|
583 -.Bk -words |
|
584 -.Op Fl cknrstIT |
|
585 +.TH RDS-INFO 1 "October 30, 2006" |
|
586 +.SH "NAME" |
|
587 +rds-info - display information from the RDS kernel module |
|
588 +.SH SYNOPSIS |
|
589 +.B rds-info [-cknrstIT] |
|
590 |
|
591 -.Sh DESCRIPTION |
|
592 -The |
|
593 -.Nm |
|
594 -utility presents various sources of information that |
|
595 +.SH DESCRIPTION |
|
596 +.PP |
|
597 +The utility presents various sources of information that |
|
598 the RDS kernel module maintains. When run without any optional arguments |
|
599 -.Nm |
|
600 will output all the information it knows of. When options are specified then |
|
601 only the information associated with those options is displayed. |
|
602 |
|
603 The options are as follows: |
|
604 -.Bl -tag -width Ds |
|
605 -.It Fl v |
|
606 +.SH OPTIONS |
|
607 +.PP |
|
608 +.TP 7 |
|
609 +\fB\-v |
|
610 Requests verbose output. When this option is given, some classes of information |
|
611 will display additional data. |
|
612 |
|
613 -.It Fl c |
|
614 +.TP |
|
615 +\fB\-c |
|
616 Display global counters. Each counter increments as its event |
|
617 occurs. The counters may not be reset. The set of supported counters |
|
618 may change over time. |
|
619 |
|
620 -.Bl -tag -width 4 |
|
621 -.It CounterName |
|
622 +.IP CounterName |
|
623 The name of the counter. These names come from the kernel and can change |
|
624 depending on the capability of the kernel module. |
|
625 -.It Value |
|
626 +.IP Value |
|
627 The number of times that the counter has been incremented since the kernel |
|
628 module was loaded. |
|
629 -.El |
|
630 |
|
631 -.It Fl k |
|
632 +.TP |
|
633 +\fB\-k\fR |
|
634 Display all the RDS sockets in the system. There will always be one socket |
|
635 listed that is neither bound to nor connected to any addresses because |
|
636 -.Nm |
|
637 itself uses an unbound socket to collect information. |
|
638 |
|
639 -.Bl -tag -width 4 |
|
640 -.It BoundAddr, BPort |
|
641 +.IP BoundAddr, BPort |
|
642 The IP address and port that the socket is bound to. 0.0.0.0 0 indicates that |
|
643 the socket has not been bound. |
|
644 -.It ConnAddr, CPort |
|
645 +.IP ConnAddr, CPort |
|
646 The IP address and port that the socket is connected to. 0.0.0.0 0 indicates |
|
647 that the socket has not been connected. |
|
648 -.It SndBuf, RcvBuf |
|
649 +.IP SndBuf, RcvBuf |
|
650 The number of bytes of message payload which can be queued for sending or |
|
651 receiving on the socket, respectively. |
|
652 -.It Inode |
|
653 +.IP Inode |
|
654 The number of the inode object associated with the socket. Can be used to |
|
655 locate the process owning a given socket by searching /proc/*/fd for |
|
656 open files referencing a socket with this inode number. |
|
657 -.El |
|
658 |
|
659 -.It Fl n |
|
660 +.TP |
|
661 +\fB\-n\fR |
|
662 Display all RDS connections. RDS connections are maintained between |
|
663 nodes by transports. |
|
664 |
|
665 -.Bl -tag -width 4 |
|
666 -.It LocalAddr |
|
667 +.IP LocalAddr |
|
668 The IP address of this node. For connections that originate and terminate on |
|
669 the same node the local address indicates which address initiated the |
|
670 connection establishment. |
|
671 -.It RemoteAddr |
|
672 +.IP RemoteAddr |
|
673 The IP address of the remote end of the connection. |
|
674 -.It NextTX |
|
675 +.IP NextTX |
|
676 The sequence number that will be given to the next message that is sent |
|
677 over the connection. |
|
678 -.It NextRX |
|
679 +.IP NextRX |
|
680 The sequence number that is expected from the next message to arrive over |
|
681 the connection. Any incoming messages with sequence numbers less than this |
|
682 will be dropped. |
|
683 -.It Flg |
|
684 +.IP Flg |
|
685 Flags which indicate the state of the connection. |
|
686 -.Bl -tag -width 4 |
|
687 -.It s |
|
688 -A process is currently sending a message down the connection. |
|
689 -.It c |
|
690 -The transport is attempting to connect to the remote address. |
|
691 -.It C |
|
692 -The connection to the remote host is connected and active. |
|
693 -.El |
|
694 -.El |
|
695 |
|
696 -.It Fl r, Fl s, Fl t |
|
697 +.IP s |
|
698 + A process is currently sending a message down |
|
699 + the connection. |
|
700 +.IP c |
|
701 + The transport is attempting to connect to the |
|
702 + remote address. |
|
703 +.IP C |
|
704 + The connection to the remote host is connected |
|
705 + and active. |
|
706 + |
|
707 +.TP |
|
708 +\fB\-r\fR, \fB\-s\fR, \fB\-t\fR |
|
709 Display the messages in the receive, send, or retransmit queues respectively. |
|
710 -.Bl -tag -width 4 |
|
711 -.It LocalAddr, LPort |
|
712 + |
|
713 +.IP LocalAddr, LPort |
|
714 The local IP address and port on this node associated with the message. For |
|
715 sent messages this is the source address, for receive messages it is the |
|
716 destination address. |
|
717 -.It RemoteAddr, RPort |
|
718 +.IP RemoteAddr, RPort |
|
719 The remote IP address and port associated with the message. For sent messages |
|
720 this is the destination address, for receive messages it is the source address. |
|
721 -.It Seq |
|
722 +.IP Seq |
|
723 The sequence number of the message. |
|
724 -.It Bytes |
|
725 +.IP Bytes |
|
726 The number of bytes in the message payload. |
|
727 -.El |
|
728 |
|
729 + |
|
730 +.PP |
|
731 The following information sources are dependent on specific transports which |
|
732 may not always be available. |
|
733 |
|
734 -.It Fl I |
|
735 +.TP 7 |
|
736 +\fB\-I\fR |
|
737 Display the IB connections which the IB transport is using to provide |
|
738 RDS connections. |
|
739 |
|
740 -.Bl -tag -width 4 |
|
741 -.It LocalAddr |
|
742 +.IP LocalAddr |
|
743 The local IP address of this connection. |
|
744 -.It RemoteAddr |
|
745 +.IP RemoteAddr |
|
746 The remote IP address of this connection. |
|
747 -.It LocalDev |
|
748 +.IP LocalDev |
|
749 The local IB Global Identifier, printed in IPv6 address syntax. |
|
750 -.It RemoteDev |
|
751 +.IP RemoteDev |
|
752 The remote IB Global Identifier, printed in IPv6 address syntax. |
|
753 -.El |
|
754 |
|
755 If verbose output is requested, per-connection settings such as the |
|
756 maximum number of send and receive work requests will be displayed |
|
757 in addition. |
|
758 |
|
759 -.It Fl T |
|
760 +.TP 7 |
|
761 +\fB\-T\fR |
|
762 Display the TCP sockets which the TCP transport is using to provide |
|
763 RDS connections. |
|
764 |
|
765 -.Bl -tag -width 4 |
|
766 -.It LocalAddr, LPort |
|
767 +.IP LocalAddr, LPort |
|
768 The local IP address and port of this socket. |
|
769 -.It RemoteAddr, RPort |
|
770 +.IP RemoteAddr, RPort |
|
771 The remote IP address and port that this socket is connected to. |
|
772 -.It HdrRemain |
|
773 +.IP HdrRemain |
|
774 The number of bytes that must be read off the socket to complete the next |
|
775 full RDS header. |
|
776 -.It DataRemain |
|
777 +.IP DataRemain |
|
778 The number of bytes that must be read off the socket to complete the data |
|
779 payload of the message which is being received. |
|
780 -.It SentNxt |
|
781 +.IP SentNxt |
|
782 The TCP sequence number of the first byte of the last message that we sent |
|
783 down the connection. |
|
784 -.It ExpectedUna |
|
785 +.IP ExpectedUna |
|
786 The TCP sequence number of the byte past the last byte of the last message |
|
787 that we sent down the connection. When we see that the remote side has |
|
788 acked up to this byte then we know that the remote side has received all |
|
789 our RDS messages. |
|
790 -.It SeenUna |
|
791 +.IP SeenUna |
|
792 The TCP sequence number of the byte past the last byte which has been |
|
793 acked by the remote host. |
|
794 -.El |
|
795 - |
|
796 -.El |
|
797 -.Pp |
|
798 diff -r -u /tmp/rds-tools-2.0.4/rds-ping.1 rds-tools-2.0.7/rds-ping.1 |
|
799 --- /tmp/rds-tools-2.0.4/rds-ping.1 Wed Aug 4 15:25:11 2010 |
|
800 +++ rds-tools-2.0.7/rds-ping.1 Thu Feb 24 13:27:52 2011 |
|
801 @@ -1,69 +1,54 @@ |
|
802 -.Dd Apr 22, 2008 |
|
803 -.Dt RDS-PING 1 |
|
804 -.Os |
|
805 -.Sh NAME |
|
806 -.Nm rds-ping |
|
807 -.Nd test reachability of remote node over RDS |
|
808 -.Pp |
|
809 -.Sh SYNOPSIS |
|
810 -.Nm rds-ping |
|
811 -.Bk -words |
|
812 -.Op Fl c Ar count |
|
813 -.Op Fl i Ar interval |
|
814 -.Op Fl I Ar local_addr |
|
815 -.Ar remote_addr |
|
816 +.TH RDS-PING 1 "Apr 22, 2008" |
|
817 +.SH NAME |
|
818 +rds-ping - test reachability of remote node over RDS |
|
819 |
|
820 -.Sh DESCRIPTION |
|
821 -.Nm rds-ping |
|
822 -is used to test whether a remote node is reachable over RDS. |
|
823 -Its interface is designed to operate pretty much the standard |
|
824 -.Xr ping 8 |
|
825 +.SH SYNOPSIS |
|
826 +.B rds-ping [-c count] [-i interval] [-I local_addr] remote_addr |
|
827 + |
|
828 +.SH DESCRIPTION |
|
829 +.PP |
|
830 +rds-ping is used to test whether a remote node is reachable over RDS. |
|
831 +Its interface is designed to operate pretty much the standard ping(1M) |
|
832 utility, even though the way it works is pretty different. |
|
833 -.Pp |
|
834 -.Nm rds-ping |
|
835 -opens several RDS sockets and sends packets to port 0 on |
|
836 +.PP |
|
837 +rds-ping opens several RDS sockets and sends packets to port 0 on |
|
838 the indicated host. This is a special port number to which |
|
839 no socket is bound; instead, the kernel processes incoming |
|
840 packets and responds to them. |
|
841 -.Sh OPTIONS |
|
842 +.SH OPTIONS |
|
843 The following options are available for use on the command line: |
|
844 -.Bl -tag -width Ds |
|
845 -.It Fl c Ar count |
|
846 -Causes |
|
847 -.Nm rds-ping |
|
848 -to exit after sending (and receiving) the specified number of |
|
849 +.PP |
|
850 +.TP 7 |
|
851 +\fB\-c count |
|
852 +Causes rds-ping to exit after sending (and receiving) the specified number of |
|
853 packets. |
|
854 -.It Fl I Ar address |
|
855 -By default, |
|
856 -.Nm rds-ping |
|
857 -will pick the local source address for the RDS socket based |
|
858 +.TP |
|
859 +\fB\-I address |
|
860 +By default, rds-ping will pick the local source address for the RDS socket based |
|
861 on routing information for the destination address (i.e. if |
|
862 packets to the given destination would be routed through interface |
|
863 -.Nm ib0 , |
|
864 +ib0 , |
|
865 then it will use the IP address of |
|
866 -.Nm ib0 |
|
867 +ib0 |
|
868 as source address). |
|
869 Using the |
|
870 .Fl I |
|
871 option, you can override this choice. |
|
872 -.It Fl i Ar timeout |
|
873 -By default, |
|
874 -.Nm rds-ping |
|
875 -will wait for one second between sending packets. Use this option |
|
876 +.TP |
|
877 +\fB\-i timeout |
|
878 +By default, rds-ping will wait for one second between sending packets. Use this option |
|
879 to specified a different interval. The timeout value is given in |
|
880 seconds, and can be a floating point number. Optionally, append |
|
881 -.Nm msec |
|
882 +msec |
|
883 or |
|
884 -.Nm usec |
|
885 +usec |
|
886 to specify a timeout in milliseconds or microseconds, respectively. |
|
887 -.It |
|
888 +.IP |
|
889 Specifying a timeout considerably smaller than the packet round-trip |
|
890 time will produce unexpected results. |
|
891 -.El |
|
892 -.Sh AUTHORS |
|
893 -.Nm rds-ping |
|
894 + |
|
895 +.SH AUTHORS |
|
896 +rds-ping |
|
897 was written by Olaf Kirch <[email protected]>. |
|
898 -.Sh SEE ALSO |
|
899 -.Xr rds 7 , |
|
900 -.Xr rds-info 1 , |
|
901 -.Xr rds-stress 1 . |
|
902 +.S~ SEE ALSO |
|
903 +rds 7, rds-info 1, rds-stress 1. |
|
904 diff -r -u /tmp/rds-tools-2.0.4/configure.in rds-tools-2.0.7/configure.in |
|
905 --- /tmp/rds-tools-2.0.4/configure.in Wed Aug 4 15:25:11 2010 |
|
906 +++ rds-tools-2.0.7/configure.in Thu Feb 24 13:27:51 2011 |
|
907 @@ -1,7 +1,7 @@ |
|
908 AC_PREREQ(2.55) |
|
909 AC_INIT() |
|
910 |
|
911 -VERSION=2.0.4 |
|
912 +VERSION=2.0.7 |
|
913 RELEASE=1 |
|
914 |
|
915 AC_SUBST(VERSION) |
|
916 diff -r -u /tmp/rds-tools-2.0.4/rds-ping.c rds-tools-2.0.7/rds-ping.c |
|
917 --- /tmp/rds-tools-2.0.4/rds-ping.c Wed Aug 4 15:25:10 2010 |
|
918 +++ rds-tools-2.0.7/rds-ping.c Thu Feb 24 13:27:52 2011 |
|
919 @@ -48,7 +48,11 @@ |
|
920 #include <sys/poll.h> |
|
921 #include <fcntl.h> |
|
922 #include <getopt.h> |
|
923 +#if defined(__SVR4) && defined(__sun) |
|
924 +#include <sys/rds.h> |
|
925 +#else |
|
926 #include "rds.h" |
|
927 +#endif |
|
928 |
|
929 #include "pfhack.h" |
|
930 |
|
931 @@ -155,7 +159,12 @@ |
|
932 } |
|
933 |
|
934 memset(&sin, 0, sizeof(sin)); |
|
935 +#if defined(__SVR4) && defined(__sun) |
|
936 + sin.sin_family = AF_INET_OFFLOAD; |
|
937 +#else |
|
938 sin.sin_family = AF_INET; |
|
939 +#endif |
|
940 + |
|
941 sin.sin_addr = opt_dstaddr; |
|
942 |
|
943 gettimeofday(&next_ts, NULL); |
|
944 @@ -181,7 +190,7 @@ |
|
945 break; |
|
946 |
|
947 timeradd(&next_ts, &opt_wait, &next_ts); |
|
948 - if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin))) |
|
949 + if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)) < 0) |
|
950 err = errno; |
|
951 sp->sent_id = ++sent; |
|
952 sp->sent_ts = now; |
|
953 @@ -258,7 +267,11 @@ |
|
954 int pf; |
|
955 |
|
956 memset(&sin, 0, sizeof(sin)); |
|
957 +#if defined(__SVR4) && defined(__sun) |
|
958 + sin.sin_family = AF_INET_OFFLOAD; |
|
959 +#else |
|
960 sin.sin_family = AF_INET; |
|
961 +#endif |
|
962 |
|
963 #ifdef DYNAMIC_PF_RDS |
|
964 pf = discover_pf_rds(); |
|
965 @@ -278,6 +291,9 @@ |
|
966 if (ufd < 0) |
|
967 die_errno("unable to create UDP socket"); |
|
968 sin.sin_addr = *dst; |
|
969 +#if defined(__SVR4) && defined(__sun) |
|
970 + sin.sin_family = AF_INET; |
|
971 +#endif |
|
972 sin.sin_port = htons(1); |
|
973 if (connect(ufd, (struct sockaddr *) &sin, sizeof(sin)) < 0) |
|
974 die_errno("unable to connect to %s", |
|
975 @@ -289,6 +305,9 @@ |
|
976 |
|
977 *src = sin.sin_addr; |
|
978 close(ufd); |
|
979 +#if defined(__SVR4) && defined(__sun) |
|
980 + sin.sin_family = AF_INET_OFFLOAD; |
|
981 +#endif |
|
982 } |
|
983 |
|
984 sin.sin_addr = *src; |
|
985 diff -r -u /tmp/rds-tools-2.0.4/Makefile.in rds-tools-2.0.7/Makefile.in |
|
986 --- /tmp/rds-tools-2.0.4/Makefile.in Wed Aug 4 15:25:11 2010 |
|
987 +++ rds-tools-2.0.7/Makefile.in Thu Feb 24 13:27:51 2011 |
|
988 @@ -4,10 +4,14 @@ |
|
989 mandir = $(DESTDIR)@mandir@ |
|
990 incdir = $(DESTDIR)@includedir@ |
|
991 |
|
992 +CC=gcc |
|
993 + |
|
994 all: all-programs |
|
995 |
|
996 -CFLAGS = -O2 -Wall -Iinclude |
|
997 -CPPFLAGS = -DDEBUG_EXE -DRDS_VERSION=\"@VERSION@\" -MD -MP -MF $(@D)/.$(basename $(@F)).d |
|
998 +CFLAGS += -O2 -Wall -Iinclude |
|
999 +CPPFLAGS += -D_XOPEN_SOURCE=500 -D__EXTENSIONS__ \ |
|
1000 + -DDEBUG_EXE -DRDS_VERSION=\"@VERSION@\" -MD -MP -MF $(@D)/.$(basename $(@F)).d |
|
1001 +LDFLAGS += -lsocket -lnsl -llgrp |
|
1002 |
|
1003 HEADERS = kernel-list.h pfhack.h include/rds.h |
|
1004 COMMON_SOURCES = pfhack.c |
|
1005 @@ -15,7 +19,7 @@ |
|
1006 CLEAN_OBJECTS = $(addsuffix .o,$(PROGRAMS)) $(subst .c,.o,$(COMMON_SOURCES)) |
|
1007 |
|
1008 # This is the default |
|
1009 -DYNAMIC_PF_RDS = true |
|
1010 +#DYNAMIC_PF_RDS = true |
|
1011 |
|
1012 ifneq ($(DYNAMIC_PF_RDS),) |
|
1013 CPPFLAGS += -DDYNAMIC_PF_RDS |
|
1014 @@ -29,14 +33,14 @@ |
|
1015 all-programs: $(PROGRAMS) |
|
1016 |
|
1017 install: $(PROGRAMS) |
|
1018 - install -d $(bindir) |
|
1019 - install -m 555 -s $(PROGRAMS) $(bindir) |
|
1020 - install -d $(mandir)/man1 |
|
1021 - install -d $(mandir)/man7 |
|
1022 - install -m 644 *.1 $(mandir)/man1 |
|
1023 - install -m 644 *.7 $(mandir)/man7 |
|
1024 - install -d $(incdir)/net |
|
1025 - install -m 444 include/rds.h $(incdir)/net |
|
1026 + $(INSTALL) -d $(bindir) |
|
1027 + $(INSTALL) -m 755 -s $(PROGRAMS) $(bindir) |
|
1028 + $(INSTALL) -d $(mandir)/man1 |
|
1029 + $(INSTALL) -d $(mandir)/man7 |
|
1030 + $(INSTALL) -m 644 *.1 $(mandir)/man1 |
|
1031 + $(INSTALL) -m 644 *.7 $(mandir)/man7 |
|
1032 + $(INSTALL) -d $(incdir)/net |
|
1033 + $(INSTALL) -m 444 include/rds.h $(incdir)/net |
|
1034 |
|
1035 clean: |
|
1036 rm -f $(PROGRAMS) $(CLEAN_OBJECTS) |
|
1037 @@ -47,7 +51,7 @@ |
|
1038 |
|
1039 |
|
1040 $(PROGRAMS) : % : %.o $(COMMON_OBJECTS) |
|
1041 - gcc $(CFLAGS) $(LDFLAGS) -o $@ $^ |
|
1042 + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $^ |
|
1043 |
|
1044 LOCAL_DFILES := $(wildcard .*.d) |
|
1045 ifneq ($(LOCAL_DFILES),) |
|
1046 @@ -72,8 +76,6 @@ |
|
1047 configure \ |
|
1048 README \ |
|
1049 rds-tools.txt \ |
|
1050 - stap/rds.stp \ |
|
1051 - stap/README \ |
|
1052 docs/rds-architecture.txt \ |
|
1053 examples/Makefile \ |
|
1054 examples/rds-sample.c \ |
|
1055 diff -r -u /tmp/rds-tools-2.0.4/examples/rds-sample.c rds-tools-2.0.7/examples/rds-sample.c |
|
1056 --- /tmp/rds-tools-2.0.4/examples/rds-sample.c Wed Aug 4 15:25:11 2010 |
|
1057 +++ rds-tools-2.0.7/examples/rds-sample.c Thu Feb 24 13:27:53 2011 |
|
1058 @@ -26,6 +26,7 @@ |
|
1059 #include <string.h> |
|
1060 #include <stdlib.h> |
|
1061 |
|
1062 +#if !(defined(__SVR4) && defined(__sun)) |
|
1063 /* FIXME - this is a hack to getaround RDS not exporting any header files. |
|
1064 * This is a local copy of the file found at net/rds/ |
|
1065 */ |
|
1066 @@ -33,6 +34,9 @@ |
|
1067 /* These are defined in rds.h....but that file is not happily included */ |
|
1068 #define SOL_RDS 272 |
|
1069 #define PF_RDS 28 |
|
1070 +#else |
|
1071 +#include <sys/rds.h> |
|
1072 +#endif |
|
1073 |
|
1074 |
|
1075 #define TESTPORT 4000 |
|
1076 @@ -107,12 +111,12 @@ |
|
1077 cmsg->cmsg_type = RDS_CMSG_RDMA_ARGS; |
|
1078 cmsg->cmsg_len = CMSG_LEN(sizeof(struct rds_rdma_args)); |
|
1079 |
|
1080 - iov.addr = (uint64_t) buf; |
|
1081 + iov.addr = (uint64_t)(uintptr_t)buf; |
|
1082 iov.bytes = sizeof(struct rdss_message); |
|
1083 |
|
1084 args->remote_vec.addr = 0; |
|
1085 args->remote_vec.bytes = sizeof(struct rdss_message); |
|
1086 - args->local_vec_addr = (uint64_t) &iov; |
|
1087 + args->local_vec_addr = (uint64_t)(uintptr_t)&iov; |
|
1088 args->nr_local = 1; |
|
1089 args->flags = remote_flags ? (RDS_RDMA_READWRITE | RDS_RDMA_FENCE) : 0; |
|
1090 args->flags |= RDS_RDMA_NOTIFY_ME; |
|
1091 @@ -244,9 +248,9 @@ |
|
1092 void *ctlbuf; |
|
1093 struct iovec *iov; |
|
1094 |
|
1095 - mr_args.vec.addr = (uint64_t) buf; |
|
1096 + mr_args.vec.addr = (uint64_t)(uintptr_t)buf; |
|
1097 mr_args.vec.bytes = sizeof(struct rdss_message); |
|
1098 - mr_args.cookie_addr = (uint64_t) cookie; |
|
1099 + mr_args.cookie_addr = (uint64_t)(uintptr_t)cookie; |
|
1100 mr_args.flags = RDS_RDMA_USE_ONCE; |
|
1101 |
|
1102 ctlbuf = calloc(1, CMSG_SPACE(sizeof(mr_args))); |
|
1103 diff -r -u /tmp/rds-tools-2.0.4/examples/Makefile rds-tools-2.0.7/examples/Makefile |
|
1104 --- /tmp/rds-tools-2.0.4/examples/Makefile Wed Aug 4 15:25:11 2010 |
|
1105 +++ rds-tools-2.0.7/examples/Makefile Thu Feb 24 13:27:52 2011 |
|
1106 @@ -1,6 +1,12 @@ |
|
1107 +CC=gcc |
|
1108 +LIBS = -lsocket -lnsl |
|
1109 +CPPFLAGS += -D_XOPEN_SOURCE=500 -D__EXTENSIONS__ |
|
1110 |
|
1111 all: rds-sample |
|
1112 |
|
1113 -rds-sample: rds-sample.o |
|
1114 +rds-sample: rds-sample.c |
|
1115 + $(CC) $(CPPFLAGS) $(CFLAGS) -o rds-sample rds-sample.c $(LIBS) |
|
1116 |
|
1117 -CFLAGS = -I ../include |
|
1118 +clean: |
|
1119 + rm -rf rds-sample |
|
1120 + |
|
1121 diff -r -u /tmp/rds-tools-2.0.4/configure rds-tools-2.0.7/configure |
|
1122 --- /tmp/rds-tools-2.0.4/configure Wed Aug 4 15:25:11 2010 |
|
1123 +++ rds-tools-2.0.7/configure Thu Feb 24 13:27:51 2011 |
|
1124 @@ -1215,7 +1215,7 @@ |
|
1125 |
|
1126 |
|
1127 |
|
1128 -VERSION=2.0.4 |
|
1129 +VERSION=2.0.7 |
|
1130 RELEASE=1 |
|
1131 |
|
1132 |
|
1133 diff -r -u /tmp/rds-tools-2.0.4/rds-stress.1 rds-tools-2.0.7/rds-stress.1 |
|
1134 --- /tmp/rds-tools-2.0.4/rds-stress.1 Wed Aug 4 15:25:11 2010 |
|
1135 +++ rds-tools-2.0.7/rds-stress.1 Thu Feb 24 13:27:52 2011 |
|
1136 @@ -1,99 +1,103 @@ |
|
1137 -.Dd May 15, 2007 |
|
1138 -.Dt RDS-STRESS 1 |
|
1139 -.Os |
|
1140 -.Sh NAME |
|
1141 -.Nm rds-stress |
|
1142 -.Nd send messages between processes over RDS sockets |
|
1143 -.Pp |
|
1144 -.Sh SYNOPSIS |
|
1145 -.Nm rds-stress |
|
1146 -.Bk -words |
|
1147 -.Op Fl p Ar port_number |
|
1148 -.Op Fl r Ar receive_address |
|
1149 -.Op Fl s Ar send_address |
|
1150 -.Op Fl a Ar ack_bytes |
|
1151 -.Op Fl q Ar request_bytes |
|
1152 -.Op Fl D Ar rdma_bytes |
|
1153 -.Op Fl d Ar queue_depth |
|
1154 -.Op Fl t Ar nr_tasks |
|
1155 -.Op Fl c |
|
1156 -.Op Fl R |
|
1157 -.Op Fl V |
|
1158 -.Op Fl v |
|
1159 +.TH RDS-STRESS 1 " May 15, 2007" |
|
1160 +.SH "NAME" |
|
1161 +rds-stress - send messages between processes over RDS sockets |
|
1162 +.PP |
|
1163 +.SH SYNOPSIS |
|
1164 +.HP |
|
1165 +.nf |
|
1166 +rds-stress [-p port_number] -r [receive_address] [-s send_address] |
|
1167 + [-a ack_bytes] [-q request_bytes] [-D rdma_bytes] |
|
1168 + [-d queue_depth] [-t Ar nr_tasks] [-c] [-R] [-V] [-v] [-o] |
|
1169 + [-I iovecs] -M [nr] [-z] [-g lgrpid] |
|
1170 +.fi |
|
1171 |
|
1172 -.Sh DESCRIPTION |
|
1173 -.Nm rds-stress |
|
1174 + |
|
1175 +.SH DESCRIPTION |
|
1176 +.PP |
|
1177 +.Nm |
|
1178 +rds-stress |
|
1179 sends messages between groups tasks, usually running on seperate |
|
1180 machines. |
|
1181 -.Pp |
|
1182 +.PP |
|
1183 First a passive receiving instance is started. |
|
1184 -.Pp |
|
1185 -.Dl $ rds-stress |
|
1186 -.Pp |
|
1187 +.RS 12 |
|
1188 + |
|
1189 + $ rds-stress |
|
1190 +.RE |
|
1191 +.PP |
|
1192 Then an active sending instance is started, giving it |
|
1193 the address and port at which it will find a listening |
|
1194 passive receiver. In addition, it is given configuration options which |
|
1195 both instances will use. |
|
1196 -.Pp |
|
1197 -.Dl $ rds-stress -s recvhost -p 4000 -t 1 -d 1 |
|
1198 -.Pp |
|
1199 +.PP |
|
1200 +.RS 12 |
|
1201 + $ rds-stress -s recvhost -p 4000 -t 1 -d 1 |
|
1202 +.RE |
|
1203 +.PP |
|
1204 The active sender will parse the options, connect to the passive receiver, and |
|
1205 send the options over this connection. From this point on both instances |
|
1206 exhibit the exact same behaviour. |
|
1207 -.Pp |
|
1208 +.PP |
|
1209 They will create a number of child tasks as specified by the -t option. Once |
|
1210 the children are created the parent sleeps for a second at a time, printing a |
|
1211 summary of statistics at each interval. |
|
1212 -.Pp |
|
1213 +.PP |
|
1214 Each child will open an RDS socket, each binding to a port number in order |
|
1215 after the port number given on the command line. The first child would bind to |
|
1216 port 4001 in our example. Each child sets the send and receive buffers to |
|
1217 exactly fit the number of messages, requests and acks, that will be in flight |
|
1218 as determind by the command line arguments. |
|
1219 -.Pp |
|
1220 +.PP |
|
1221 The children then enter their loop. They will keep a number of sent messages |
|
1222 outstanding as specified by the -d option. When they reach this limit they |
|
1223 will wait to receive acks which will allow them to send again. As they receive |
|
1224 messages from their peers they immediately send acks. |
|
1225 -.Pp |
|
1226 +.PP |
|
1227 Every second, the parent process will display statistics of the ongoing |
|
1228 stress test. The output is described in section OUTPUT below. |
|
1229 -.Pp |
|
1230 +.PP |
|
1231 If the -T option is given, the test will terminate after the specified time, |
|
1232 and a summary is printed. |
|
1233 -.Pp |
|
1234 +.PP |
|
1235 Each child maintains outstanding messages to all other children of the other instance. |
|
1236 They do not send to their siblings. |
|
1237 -.Sh OPTIONS |
|
1238 +.SH OPTIONS |
|
1239 +.PP |
|
1240 The following options are available for use on the command line: |
|
1241 -.Bl -tag -width Ds |
|
1242 -.It Fl p Ar port_number |
|
1243 + |
|
1244 +.TP 7 |
|
1245 +\fB\-p port_number |
|
1246 Each parent binds a TCP socket to this port number and their respective |
|
1247 address. They will trade the negotiated options over this socket. Each |
|
1248 child will bind an RDS socket to the range of ports immediately following |
|
1249 this port number, for as many children as there are. |
|
1250 -.It Fl s Ar send_address |
|
1251 +.TP |
|
1252 +\fB\-s send_address |
|
1253 A connection attempt is made to this address. Once its complete and the |
|
1254 options are sent over it then children will be created and work will proceed. |
|
1255 -.It Fl r Ar receive_address |
|
1256 +.TP |
|
1257 +\fB\-r receive_address |
|
1258 This specifies the address that messages will be sent from. If -s is not |
|
1259 specified then rds-stress waits for a connection on this address before |
|
1260 proceeding. |
|
1261 -.Pp |
|
1262 + |
|
1263 If this option is not given, rds-stress will choose an appropriate address. |
|
1264 The passive process will accept connections on all local interfaces, and |
|
1265 obtain the address once the control connection is established. |
|
1266 The active process will choose a local address based on the interface through |
|
1267 which it connects to the destination address. |
|
1268 -.It Fl a Ar ack_bytes |
|
1269 +.TP |
|
1270 +\fB\-a ack_bytes |
|
1271 This specifies the size of the ack messages, in bytes. There is a minimum size |
|
1272 which depends on the format of the ack messages, which may change over time. |
|
1273 See section "Message Sizes" below. |
|
1274 -.It Fl q Ar request_bytes |
|
1275 +.TP |
|
1276 +\fB\-q request_bytes |
|
1277 This specifies the size of the request messages, in bytes. |
|
1278 It also has a minimum size which may change over time. |
|
1279 See section "Message Sizes" below. |
|
1280 -.It Fl D Ar rdma_bytes |
|
1281 +.TP |
|
1282 +\fB\-D rdma_bytes |
|
1283 RDSv3 is capable of transmitting part of a message via RDMA directly from |
|
1284 application buffer to application buffer. This option enables RDMA support |
|
1285 in rds-stress: request packets include parameters for an RDMA READ or WRITE |
|
1286 @@ -100,20 +104,25 @@ |
|
1287 operation, which the receiving process executes at the time the ACK packet |
|
1288 is sent. |
|
1289 See section "Message Sizes" below. |
|
1290 -.It Fl d Ar queue_depth |
|
1291 +.TP |
|
1292 +\fB\-d queue_depth |
|
1293 Each child will try to maintain this many sent messages outstanding to each |
|
1294 of its peers on the remote address. |
|
1295 -.It Fl t Ar nr_tasks |
|
1296 +.TP |
|
1297 +\fB\-t nr_tasks |
|
1298 Each parent will create this many children tasks. |
|
1299 -.It Fl T Ar seconds |
|
1300 +.TP |
|
1301 +\fB\-T seconds |
|
1302 Specify the duration of the test run. After the specified number of seconds, |
|
1303 all processes on both ends of the connection will terminate, and the |
|
1304 active instance will print a summary. By default, rds-stress will keep |
|
1305 on sending and receiving messages. |
|
1306 -.It Fl z |
|
1307 +.TP |
|
1308 +\fB\-z |
|
1309 This flag can be used in conjunction with -T. It suppresses the ongoing |
|
1310 display of statistics, and prints a summary only. |
|
1311 -.It Fl c |
|
1312 +.TP |
|
1313 +\fB\-c |
|
1314 This causes rds-stress to create child tasks which just consume CPU cycles. |
|
1315 One task is created for each CPU in the system. First each child observes the |
|
1316 maximum rate at which it can consume cycles. This means that this option |
|
1317 @@ -121,50 +130,78 @@ |
|
1318 use of the system by observing the lesser rate at which the children consume |
|
1319 cycles. This option is *not* shared between the active and passive instances. |
|
1320 It must be specified on each rds-stress command line. |
|
1321 -.It Fl R |
|
1322 +.TP |
|
1323 +\fB\-R |
|
1324 This tells the rds-stress parent process to run with SCHED_RR priority, |
|
1325 giving it precedence over the child processes. This is useful when running |
|
1326 with lots of tasks, where there is a risk of the child processes starving |
|
1327 the parent, and skewing the results. |
|
1328 -.It Fl v |
|
1329 +.TP |
|
1330 +\fB\-v |
|
1331 With this option enabled, packets are filled with a pattern that is |
|
1332 verified by the receiver. This check can help detect data corruption |
|
1333 occuring under high load. |
|
1334 -.El |
|
1335 -.Pp |
|
1336 +.TP |
|
1337 +\fB\-o |
|
1338 +Datagrams sent one way only (default is both) |
|
1339 +.TP |
|
1340 +\fB\-I iovecs |
|
1341 +RDMA: number of user buffers to target (default is 1, max is 512) |
|
1342 +.TP |
|
1343 +\fB\-M nr |
|
1344 +RDMA: mode (0=readwrite,1=readonly,2=writeonly) |
|
1345 +.TP |
|
1346 +\fB\-g lgrpid |
|
1347 +bind the process to the specified lgrp |
|
1348 +.PP |
|
1349 |
|
1350 -.Ss Message Sizes |
|
1351 +.SS Message Sizes |
|
1352 Options which set a message size (such as -a) specify a number of bytes |
|
1353 by default. By appending \fBK\fP, \fBM\fP, or \fBG\fP, you can specify the size |
|
1354 in kilobytes, megabytes or gigabytes, respectively. For instance, |
|
1355 the following will run rds-stress with a message and ACK size of 1024 |
|
1356 bytes, and an RDMA message size of 1048576 bytes: |
|
1357 -.Pp |
|
1358 -.Dl rds-stress ... -q 1K -a 1K -D 1M |
|
1359 -.Pp |
|
1360 -.Pp |
|
1361 -.Sh OUTPUT |
|
1362 +.PP |
|
1363 +.RS 12 |
|
1364 +rds-stress ... -q 1K -a 1K -D 1M |
|
1365 +.RE |
|
1366 +.PP |
|
1367 +.PP |
|
1368 +.SH OUTPUT |
|
1369 Each parent outputs columns of statistics at a regular interval: |
|
1370 -.Bl -tag -width Ds |
|
1371 -.It tsks |
|
1372 +.TP 8 |
|
1373 +tsks |
|
1374 The number of child tasks which are running. |
|
1375 -.It tx/s |
|
1376 +.TP |
|
1377 +tx/s |
|
1378 The number of sendmsg() calls that all children are executing, per second. |
|
1379 -.It tx+rx K/s |
|
1380 +.TP |
|
1381 +rx/s |
|
1382 +The number of recvmsg() calls that all children are executing, per second. |
|
1383 +.TP |
|
1384 +tx+rx K/s |
|
1385 The total number of bytes that are flowing through sendmsg() and recvmsg() for all children. |
|
1386 This includes both request and ack messages. |
|
1387 -.It rw+rr K/s |
|
1388 -The total number of bytes that are being transferred via RDMA READs and |
|
1389 +.TP |
|
1390 +mbi K/s |
|
1391 +The total number of bytes that are being received via RDMA READs and |
|
1392 WRITEs for all children. |
|
1393 -.It tx us/c |
|
1394 +.TP |
|
1395 +mbi K/s |
|
1396 +The total number of bytes that are being transmited via RDMA READs and |
|
1397 +WRITEs for all children. |
|
1398 +.TP |
|
1399 +tx us/c |
|
1400 The average number of microseconds spent in sendmsg() calls. |
|
1401 -.It rtt us |
|
1402 +.TP |
|
1403 +rtt us |
|
1404 The average round trip time for a request and ack message pair. This measures |
|
1405 the total time between when a task sends a request and when it finally receives |
|
1406 the ack for that message. Because it includes the time it takes for the |
|
1407 receiver to wake up, receive the message, and send an ack, it can grow to be |
|
1408 quite large under load. |
|
1409 -.It cpu % |
|
1410 +.TP |
|
1411 +cpu % |
|
1412 This is the percentage of available CPU resources on this machine that are being |
|
1413 consumed since rds-stress started running. It will show -1.00 if -c is not |
|
1414 given. It is calculated based on the amount of CPU resources that CPU soaking |
|
1415 @@ -171,4 +208,3 @@ |
|
1416 tasks are able to consume. This lets it measure CPU use by the system, say in |
|
1417 interrupt handlers, that task-based CPU accounting does not include. |
|
1418 For this to work rds-stress must be started with -c on an idle system. |
|
1419 -.El |
|
1420 diff -r -u /tmp/rds-tools-2.0.4/include/rds.h rds-tools-2.0.7/include/rds.h |
|
1421 --- /tmp/rds-tools-2.0.4/include/rds.h Wed Aug 4 15:25:11 2010 |
|
1422 +++ rds-tools-2.0.7/include/rds.h Thu Feb 24 13:30:23 2011 |
|
1423 @@ -84,6 +84,8 @@ |
|
1424 #define RDS_CMSG_CONG_UPDATE 5 |
|
1425 #define RDS_CMSG_ATOMIC_FADD 6 |
|
1426 #define RDS_CMSG_ATOMIC_CSWP 7 |
|
1427 +#define RDS_CMSG_MASKED_ATOMIC_FADD 8 |
|
1428 +#define RDS_CMSG_MASKED_ATOMIC_CSWP 9 |
|
1429 |
|
1430 #define RDS_INFO_FIRST 10000 |
|
1431 #define RDS_INFO_COUNTERS 10000 |
|
1432 @@ -252,8 +254,25 @@ |
|
1433 rds_rdma_cookie_t cookie; |
|
1434 uint64_t local_addr; |
|
1435 uint64_t remote_addr; |
|
1436 - uint64_t swap_add; |
|
1437 - uint64_t compare; |
|
1438 + union { |
|
1439 + struct { |
|
1440 + uint64_t compare; |
|
1441 + uint64_t swap; |
|
1442 + } cswp; |
|
1443 + struct { |
|
1444 + uint64_t add; |
|
1445 + } fadd; |
|
1446 + struct { |
|
1447 + uint64_t compare; |
|
1448 + uint64_t swap; |
|
1449 + uint64_t compare_mask; |
|
1450 + uint64_t swap_mask; |
|
1451 + } m_cswp; |
|
1452 + struct { |
|
1453 + uint64_t add; |
|
1454 + uint64_t nocarry_mask; |
|
1455 + } m_fadd; |
|
1456 + }; |
|
1457 u_int64_t flags; |
|
1458 u_int64_t user_token; |
|
1459 }; |
|
1460 @@ -278,5 +297,6 @@ |
|
1461 #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ |
|
1462 #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ |
|
1463 #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ |
|
1464 +#define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ |
|
1465 |
|
1466 #endif /* IB_RDS_H */ |