changeset 1455 | 74681f26bd4e |
parent 715 | eed3ed08f692 |
child 1513 | 594764ea1267 |
1454:eb73f5852132 | 1455:74681f26bd4e |
---|---|
13 #include <syscall.h> |
13 #include <syscall.h> |
14 +#endif |
14 +#endif |
15 #include <sys/stat.h> |
15 #include <sys/stat.h> |
16 #include <sys/poll.h> |
16 #include <sys/poll.h> |
17 #include <ctype.h> |
17 #include <ctype.h> |
18 @@ -22,10 +28,16 @@ |
18 @@ -22,12 +28,51 @@ |
19 #include <fcntl.h> |
19 #include <fcntl.h> |
20 #include <sched.h> |
20 #include <sched.h> |
21 #include <getopt.h> |
21 #include <getopt.h> |
22 +#include <sys/ioctl.h> |
|
22 +#if !(defined(__SVR4) && defined(__sun)) |
23 +#if !(defined(__SVR4) && defined(__sun)) |
23 #include <byteswap.h> |
24 #include <byteswap.h> |
24 #include "rds.h" |
25 #include "rds.h" |
25 - |
26 - |
26 +#else |
27 +#else |
30 +#if defined(__SVR4) && defined(__sun) |
31 +#if defined(__SVR4) && defined(__sun) |
31 +#include <infiniband/ofa_solaris.h> |
32 +#include <infiniband/ofa_solaris.h> |
32 +#endif |
33 +#endif |
33 |
34 |
34 /* |
35 /* |
36 + * Define these here until these are defined in rds.h |
|
37 + * (rds_reset, rds_asend_args and rds_rdma_send_notify) |
|
38 + */ |
|
39 +#define SIOCRDSSETTOS 11000 |
|
40 +#define SIOCRDSGETTOS 11001 |
|
41 +#define RDS_SEND_NOTIFY_ME 0x0100 |
|
42 +#define RDS_CMSG_ASYNC_SEND 10 |
|
43 +#define RDS_CONN_RESET 8 |
|
44 + |
|
45 +struct rds_reset { |
|
46 + u_int8_t tos; |
|
47 +#if defined(__SVR4) && defined(__sun) |
|
48 + u_int32_t src; |
|
49 + u_int32_t dst; |
|
50 +#else |
|
51 + struct in_addr src; |
|
52 + struct in_addr dst; |
|
53 +#endif |
|
54 +}; |
|
55 + |
|
56 +struct rds_asend_args { |
|
57 + u_int64_t user_token; |
|
58 + u_int64_t flags; |
|
59 +}; |
|
60 + |
|
61 +struct rds_rdma_send_notify { |
|
62 + u_int64_t user_token; |
|
63 + int32_t status; |
|
64 +}; |
|
65 + |
|
66 + |
|
67 +/* |
|
35 * |
68 * |
36 @@ -102,6 +114,10 @@ |
69 * TODO |
70 * - checksum the data some day. |
|
71 @@ -74,11 +119,38 @@ |
|
72 uint32_t rdma_vector; |
|
73 uint32_t rdma_alignment; |
|
74 uint32_t connect_retries; |
|
75 + uint8_t reset; |
|
76 + uint8_t tos; |
|
77 + uint8_t async; |
|
78 } __attribute__((packed)); |
|
79 |
|
80 +#define MAX_BUCKETS 16 |
|
81 + |
|
82 static struct options opt; |
|
83 static int control_fd; |
|
84 +static uint64_t rtt_threshold; |
|
85 +static int show_histogram; |
|
86 |
|
87 +static int get_bucket(uint64_t rtt_time) |
|
88 +{ |
|
89 + int i; |
|
90 + uint64_t l_rtt_time = rtt_time; |
|
91 + |
|
92 + if (!l_rtt_time) |
|
93 + i = 0; |
|
94 + else |
|
95 + { |
|
96 + i = -1; |
|
97 + while (l_rtt_time) |
|
98 + { |
|
99 + i++; |
|
100 + l_rtt_time = (l_rtt_time >> 1); |
|
101 + } |
|
102 + } |
|
103 + |
|
104 + return i; |
|
105 +} |
|
106 + |
|
107 struct counter { |
|
108 uint64_t nr; |
|
109 uint64_t sum; |
|
110 @@ -102,6 +174,10 @@ |
|
37 |
111 |
38 #define NR_STATS S__LAST |
112 #define NR_STATS S__LAST |
39 |
113 |
40 +#if defined(__SVR4) && defined(__sun) |
114 +#if defined(__SVR4) && defined(__sun) |
41 +int sol_ioctl(int, int, struct rds_info_arg *, socklen_t *, int *); |
115 +int sol_ioctl(int, int, struct rds_info_arg *, socklen_t *, int *); |
42 +#endif |
116 +#endif |
43 + |
117 + |
44 /* |
118 /* |
45 * Parents share a mapped array of these with their children. Each child |
119 * Parents share a mapped array of these with their children. Each child |
46 * gets one. It's used to communicate between the child and the parent |
120 * gets one. It's used to communicate between the child and the parent |
47 @@ -110,6 +126,7 @@ |
121 @@ -110,9 +186,11 @@ |
48 struct child_control { |
122 struct child_control { |
49 pid_t pid; |
123 pid_t pid; |
50 int ready; |
124 int ready; |
51 + int stopping; |
125 + int stopping; |
52 struct timeval start; |
126 struct timeval start; |
53 struct counter cur[NR_STATS]; |
127 struct counter cur[NR_STATS]; |
54 struct counter last[NR_STATS]; |
128 struct counter last[NR_STATS]; |
55 @@ -254,7 +271,20 @@ |
129 + uint64_t latency_histogram[MAX_BUCKETS]; |
130 } __attribute__((aligned (256))); /* arbitrary */ |
|
131 |
|
132 struct soak_control { |
|
133 @@ -132,6 +210,7 @@ |
|
134 */ |
|
135 #define OP_REQ 1 |
|
136 #define OP_ACK 2 |
|
137 +#define OP_DUMP 3 |
|
138 |
|
139 #define RDMA_OP_READ 1 |
|
140 #define RDMA_OP_WRITE 2 |
|
141 @@ -143,12 +222,12 @@ |
|
142 */ |
|
143 struct header { |
|
144 uint32_t seq; |
|
145 + uint8_t op; |
|
146 uint32_t from_addr; |
|
147 uint32_t to_addr; |
|
148 uint16_t from_port; |
|
149 uint16_t to_port; |
|
150 uint16_t index; |
|
151 - uint8_t op; |
|
152 |
|
153 /* RDMA related. |
|
154 * rdma_op must be the first field, because we |
|
155 @@ -163,11 +242,18 @@ |
|
156 uint32_t rdma_vector; |
|
157 |
|
158 uint8_t data[0]; |
|
159 + uint8_t retry; |
|
160 + uint8_t rdma_remote_err; |
|
161 + uint8_t pending; |
|
162 } __attribute__((packed)); |
|
163 |
|
164 #define MIN_MSG_BYTES (sizeof(struct header)) |
|
165 #define BASIC_HEADER_SIZE (size_t)(&((struct header *) 0)->rdma_op) |
|
166 |
|
167 +#define print_outlier(...) do { \ |
|
168 + fprintf(stderr, __VA_ARGS__); \ |
|
169 +} while (0) |
|
170 + |
|
171 #define die(fmt...) do { \ |
|
172 fprintf(stderr, fmt); \ |
|
173 exit(1); \ |
|
174 @@ -254,7 +340,20 @@ |
|
56 |
175 |
57 die("invalid host name or dotted quad '%s'\n", ptr); |
176 die("invalid host name or dotted quad '%s'\n", ptr); |
58 } |
177 } |
59 +#if defined(__SVR4) && defined(__sun) |
178 +#if defined(__SVR4) && defined(__sun) |
60 +static lgrp_id_t lgrp_id = -1; |
179 +static lgrp_id_t lgrp_id = -1; |
71 +#endif |
190 +#endif |
72 + |
191 + |
73 static void usage(void) |
192 static void usage(void) |
74 { |
193 { |
75 fprintf(stderr, "rds-stress version %s\n", RDS_VERSION); |
194 fprintf(stderr, "rds-stress version %s\n", RDS_VERSION); |
76 @@ -281,6 +311,9 @@ |
195 @@ -273,6 +372,7 @@ |
196 " -d [depth, 1] request pipeline depth, nr outstanding\n" |
|
197 " -t [nr, 1] number of child tasks\n" |
|
198 " -T [seconds, 0] runtime of test, 0 means infinite\n" |
|
199 + " -Q [tos, 0] Type of Service\n" |
|
200 " -D [bytes] RDMA: size\n" |
|
201 " -I [iovecs, 1] RDMA: number of user buffers to target (max 512)\n" |
|
202 " -M [nr, 0] RDMA: mode (0=readwrite,1=readonly,2=writeonly)\n" |
|
203 @@ -281,6 +381,9 @@ |
|
77 " -c measure cpu use with per-cpu soak processes\n" |
204 " -c measure cpu use with per-cpu soak processes\n" |
78 " -V trace execution\n" |
205 " -V trace execution\n" |
79 " -z print a summary at end of test only\n" |
206 " -z print a summary at end of test only\n" |
80 +#if defined(__SVR4) && defined(__sun) |
207 +#if defined(__SVR4) && defined(__sun) |
81 + " -g [lgrpid] bind the process to the specified lgrp\n" |
208 + " -g [lgrpid] bind the process to the specified lgrp\n" |
82 +#endif |
209 +#endif |
83 "\n" |
210 "\n" |
84 "Example:\n" |
211 "Example:\n" |
85 " recv$ rds-stress\n" |
212 " recv$ rds-stress\n" |
86 @@ -310,7 +343,7 @@ |
213 @@ -310,7 +413,7 @@ |
87 static void check_parent(pid_t pid) |
214 static void check_parent(pid_t pid) |
88 { |
215 { |
89 if (pid != getppid()) |
216 if (pid != getppid()) |
90 - die("parent %u exited\n", pid); |
217 - die("parent %u exited\n", pid); |
91 + die("parent %u exited\n", (int)pid); |
218 + die("parent %u exited\n", (int)pid); |
92 } |
219 } |
93 |
220 |
94 /* |
221 /* |
95 @@ -334,6 +367,7 @@ |
222 @@ -334,6 +437,7 @@ |
96 msg_pattern[i] = k; |
223 msg_pattern[i] = k; |
97 } |
224 } |
98 |
225 |
99 +#if !(defined(__SVR4) && defined(__sun)) |
226 +#if !(defined(__SVR4) && defined(__sun)) |
100 #if __BYTE_ORDER == __LITTLE_ENDIAN |
227 #if __BYTE_ORDER == __LITTLE_ENDIAN |
101 #define htonll(x) bswap_64(x) |
228 #define htonll(x) bswap_64(x) |
102 #define ntohll(x) bswap_64(x) |
229 #define ntohll(x) bswap_64(x) |
103 @@ -341,6 +375,7 @@ |
230 @@ -341,6 +445,7 @@ |
104 #define htonll(x) (x) |
231 #define htonll(x) (x) |
105 #define ntohll(x) (x) |
232 #define ntohll(x) (x) |
106 #endif |
233 #endif |
107 +#endif /* Not sun */ |
234 +#endif /* Not sun */ |
108 |
235 |
109 static void encode_hdr(struct header *dst, const struct header *hdr) |
236 static void encode_hdr(struct header *dst, const struct header *hdr) |
110 { |
237 { |
111 @@ -584,7 +619,11 @@ |
238 @@ -361,6 +466,7 @@ |
239 dst->rdma_key = htonll(hdr->rdma_key); |
|
240 dst->rdma_size = htonl(hdr->rdma_size); |
|
241 dst->rdma_vector = htonl(hdr->rdma_vector); |
|
242 + dst->retry = hdr->retry; |
|
243 } |
|
244 |
|
245 static void decode_hdr(struct header *dst, const struct header *hdr) |
|
246 @@ -382,6 +488,7 @@ |
|
247 dst->rdma_key = ntohll(hdr->rdma_key); |
|
248 dst->rdma_size = ntohl(hdr->rdma_size); |
|
249 dst->rdma_vector = ntohl(hdr->rdma_vector); |
|
250 + dst->retry = hdr->retry; |
|
251 } |
|
252 |
|
253 static void fill_hdr(void *message, uint32_t bytes, struct header *hdr) |
|
254 @@ -412,11 +519,19 @@ |
|
255 * Compare incoming message header with expected header. All header fields |
|
256 * are in host byte order except for address and port fields. |
|
257 */ |
|
258 -static int check_hdr(void *message, uint32_t bytes, const struct header *hdr) |
|
259 +static int check_hdr(void *message, uint32_t bytes, struct header *hdr, struct options *opts) |
|
260 { |
|
261 struct header msghdr; |
|
262 + uint32_t inc_seq; |
|
263 + uint32_t my_seq; |
|
264 |
|
265 decode_hdr(&msghdr, message); |
|
266 + inc_seq = msghdr.seq; |
|
267 + my_seq = hdr->seq; |
|
268 + |
|
269 + if (msghdr.retry && (inc_seq < my_seq)) |
|
270 + return -1; |
|
271 + |
|
272 if (memcmp(&msghdr, hdr, BASIC_HEADER_SIZE)) { |
|
273 #define bleh(var, disp) \ |
|
274 disp(hdr->var), \ |
|
275 @@ -428,7 +543,7 @@ |
|
276 * with stdout() and we don't get things stomping on each |
|
277 * other |
|
278 */ |
|
279 - printf( "An incoming message had a header which\n" |
|
280 + printf( "An incoming message had a %s header which\n" |
|
281 "didn't contain the fields we expected:\n" |
|
282 " member expected eq got\n" |
|
283 " seq %15u %s %15u\n" |
|
284 @@ -438,6 +553,7 @@ |
|
285 " to_port %15u %s %15u\n" |
|
286 " index %15u %s %15u\n" |
|
287 " op %15u %s %15u\n", |
|
288 + (msghdr.retry) ? "RETRY" : "", |
|
289 bleh(seq, /**/), |
|
290 bleh(from_addr, inet_ntoa_32), |
|
291 bleh(from_port, ntohs), |
|
292 @@ -569,6 +685,9 @@ |
|
293 |
|
294 fcntl(fd, F_SETFL, O_NONBLOCK); |
|
295 |
|
296 + if (opts->tos && ioctl(fd, SIOCRDSSETTOS, &opts->tos)) |
|
297 + die_errno("ERROR: failed to set TOS\n"); |
|
298 + |
|
299 return fd; |
|
300 } |
|
301 |
|
302 @@ -584,7 +703,11 @@ |
|
112 if (opts->receive_addr == 0) |
303 if (opts->receive_addr == 0) |
113 return 1; |
304 return 1; |
114 |
305 |
115 +#if defined(__SVR4) && defined(__sun) |
306 +#if defined(__SVR4) && defined(__sun) |
116 + sin.sin_family = AF_INET_OFFLOAD; |
307 + sin.sin_family = AF_INET_OFFLOAD; |
118 sin.sin_family = AF_INET; |
309 sin.sin_family = AF_INET; |
119 +#endif |
310 +#endif |
120 sin.sin_port = htons(opts->starting_port); |
311 sin.sin_port = htons(opts->starting_port); |
121 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
312 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
122 |
313 |
123 @@ -677,7 +716,11 @@ |
314 @@ -639,7 +762,7 @@ |
315 mr_args.flags = RDS_FREE_MR_ARGS_INVALIDATE; |
|
316 #endif |
|
317 if (setsockopt(fd, sol, RDS_FREE_MR, &mr_args, sizeof(mr_args))) |
|
318 - die_errno("setsockopt(RDS_FREE_MR) failed"); |
|
319 + return; |
|
320 mrs_allocated--; |
|
321 } |
|
322 |
|
323 @@ -677,7 +800,11 @@ |
|
124 size = sizeof(struct rdma_key_o_meter) |
324 size = sizeof(struct rdma_key_o_meter) |
125 + 2 * nr_tasks * sizeof(*kt) |
325 + 2 * nr_tasks * sizeof(*kt) |
126 + 2 * RDMA_MAX_TRACKED_KEYS * sizeof(*ks); |
326 + 2 * RDMA_MAX_TRACKED_KEYS * sizeof(*ks); |
127 +#if defined(__SVR4) && defined(__sun) |
327 +#if defined(__SVR4) && defined(__sun) |
128 + base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
328 + base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
130 base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
330 base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
131 +#endif |
331 +#endif |
132 if (base == MAP_FAILED) |
332 if (base == MAP_FAILED) |
133 die_errno("alloc_rdma_buffers: mmap failed"); |
333 die_errno("alloc_rdma_buffers: mmap failed"); |
134 |
334 |
135 @@ -828,7 +871,7 @@ |
335 @@ -828,13 +955,20 @@ |
136 } |
336 } |
137 |
337 |
138 if (!failed) |
338 if (!failed) |
139 - trace("compare pass pattern %Lx addr %p\n", |
339 - trace("compare pass pattern %Lx addr %p\n", |
140 + trace("compare pass pattern 0x%Lx addr %p\n", |
340 + trace("compare pass pattern 0x%Lx addr %p\n", |
141 (unsigned long long) pattern, addr); |
341 (unsigned long long) pattern, addr); |
142 } |
342 } |
143 |
343 |
144 @@ -865,7 +908,11 @@ |
344 +struct retry_entry { |
345 + uint32_t retries; |
|
346 + uint32_t seq; |
|
347 + int status; |
|
348 +}; |
|
349 + |
|
350 struct task { |
|
351 unsigned int nr; |
|
352 unsigned int pending; |
|
353 + int trace; |
|
354 unsigned int unacked; |
|
355 struct sockaddr_in src_addr; /* same for all tasks */ |
|
356 struct sockaddr_in dst_addr; |
|
357 @@ -846,7 +980,14 @@ |
|
358 uint16_t recv_index; |
|
359 struct timeval * send_time; |
|
360 struct header * ack_header; |
|
361 + struct header * ack2_header; |
|
362 + struct header * req_header; |
|
363 + uint64_t * retry_token; |
|
364 + uint32_t retries; |
|
365 + uint32_t last_retry_seq; |
|
366 + uint32_t retry_index; |
|
367 |
|
368 + |
|
369 /* RDMA related stuff */ |
|
370 uint64_t ** local_buf; |
|
371 uint64_t ** rdma_buf; |
|
372 @@ -865,7 +1006,11 @@ |
|
145 /* We use mmap here rather than malloc, because it is always |
373 /* We use mmap here rather than malloc, because it is always |
146 * page aligned. */ |
374 * page aligned. */ |
147 len = 2 * opts->nr_tasks * opts->req_depth * (opts->rdma_vector * opts->rdma_size) + sys_page_size; |
375 len = 2 * opts->nr_tasks * opts->req_depth * (opts->rdma_vector * opts->rdma_size) + sys_page_size; |
148 +#if defined(__SVR4) && defined(__sun) |
376 +#if defined(__SVR4) && defined(__sun) |
149 + base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
377 + base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
151 base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); |
379 base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); |
152 +#endif |
380 +#endif |
153 if (base == MAP_FAILED) |
381 if (base == MAP_FAILED) |
154 die_errno("alloc_rdma_buffers: mmap failed"); |
382 die_errno("alloc_rdma_buffers: mmap failed"); |
155 memset(base, 0x2f, len); |
383 memset(base, 0x2f, len); |
156 @@ -915,17 +962,16 @@ |
384 @@ -915,17 +1060,16 @@ |
157 if (RDMA_OP_READ == hdr->rdma_op) { |
385 if (RDMA_OP_READ == hdr->rdma_op) { |
158 if (opt.verify) |
386 if (opt.verify) |
159 rds_fill_buffer(rdma_addr, rdma_size, hdr->rdma_pattern); |
387 rds_fill_buffer(rdma_addr, rdma_size, hdr->rdma_pattern); |
160 - trace("Requesting RDMA read for pattern %Lx " |
388 - trace("Requesting RDMA read for pattern %Lx " |
161 - "local addr to rdma read %p\n", |
389 - "local addr to rdma read %p\n", |
175 + trace("Requesting RDMA write for pattern 0x%Lx", |
403 + trace("Requesting RDMA write for pattern 0x%Lx", |
176 + hdr->rdma_pattern); |
404 + hdr->rdma_pattern); |
177 } |
405 } |
178 } |
406 } |
179 |
407 |
180 @@ -947,7 +993,7 @@ |
408 @@ -947,7 +1091,7 @@ |
181 die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op); |
409 die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op); |
182 |
410 |
183 |
411 |
184 - trace("RDS received request to issue rdma %s len %lu rva %Lx key %Lx pattern %Lx\n", |
412 - trace("RDS received request to issue rdma %s len %lu rva %Lx key %Lx pattern %Lx\n", |
185 + trace("RDS received request to issue rdma %s len %lu rva 0x%Lx key 0x%Lx pattern 0x%Lx\n", |
413 + trace("RDS received request to issue rdma %s len %lu rva 0x%Lx key 0x%Lx pattern 0x%Lx\n", |
186 in_hdr->rdma_op == RDMA_OP_WRITE? "write to" : "read from", |
414 in_hdr->rdma_op == RDMA_OP_WRITE? "write to" : "read from", |
187 rdma_size, |
415 rdma_size, |
188 (unsigned long long) in_hdr->rdma_addr, |
416 (unsigned long long) in_hdr->rdma_addr, |
189 @@ -1007,6 +1053,9 @@ |
417 @@ -966,21 +1110,33 @@ |
418 hdr->rdma_vector = in_hdr->rdma_vector; |
|
419 } |
|
420 |
|
421 -static inline unsigned int rdma_user_token(struct task *t, unsigned int qindex) |
|
422 +static inline uint64_t rdma_user_token(struct task *t, unsigned int qindex, unsigned int type, uint32_t seq) |
|
423 { |
|
424 - return t->nr * opt.req_depth + qindex; |
|
425 + uint64_t tmp = seq; |
|
426 + return (tmp << 32) | ((t->nr * opt.req_depth + qindex) << 2 | type); |
|
427 } |
|
428 |
|
429 -static void rdma_mark_completed(struct task *tasks, unsigned int token, int status) |
|
430 +static void rdma_mark_completed(struct task *tasks, uint64_t token, int status, struct options *opts) |
|
431 { |
|
432 struct task *t; |
|
433 unsigned int i; |
|
434 + struct header *hdr = NULL; |
|
435 + uint32_t seq = token >> 32; |
|
436 + unsigned int type = token & 0x03; |
|
437 + unsigned int index = (token & 0xFFFFFFFF) >> 2; |
|
438 |
|
439 - trace("RDS rdma completion for token %x\n", token); |
|
440 + trace("RDS rdma completion for token 0x%lx\n", token); |
|
441 |
|
442 - t = &tasks[token / opt.req_depth]; |
|
443 - i = token % opt.req_depth; |
|
444 + t = &tasks[index / opt.req_depth]; |
|
445 + i = index % opt.req_depth; |
|
446 |
|
447 + if (opts->async) { |
|
448 + if (type == OP_REQ) |
|
449 + hdr = &t->req_header[i]; |
|
450 + else |
|
451 + hdr = &t->ack2_header[i]; |
|
452 + } |
|
453 + |
|
454 if (status) { |
|
455 const char *errmsg; |
|
456 |
|
457 @@ -987,10 +1143,10 @@ |
|
458 switch (status) { |
|
459 case RDS_RDMA_REMOTE_ERROR: |
|
460 errmsg = "remote error"; break; |
|
461 - case RDS_RDMA_CANCELED: |
|
462 - errmsg = "operation was cancelled"; break; |
|
463 case RDS_RDMA_DROPPED: |
|
464 errmsg = "operation was dropped"; break; |
|
465 + case RDS_RDMA_CANCELED: |
|
466 + errmsg = "operation was cancelled"; break; |
|
467 case RDS_RDMA_OTHER_ERROR: |
|
468 errmsg = "other error"; break; |
|
469 default: |
|
470 @@ -997,10 +1153,38 @@ |
|
471 errmsg = "unknown error"; break; |
|
472 } |
|
473 |
|
474 - printf("%s:%u: RDMA op %u failed: %s\n", |
|
475 + trace("%s:%u: %s failed: %s\n", |
|
476 inet_ntoa(t->dst_addr.sin_addr), |
|
477 ntohs(t->dst_addr.sin_port), |
|
478 - i, errmsg); |
|
479 + type ? "SEND" : "RDMA", |
|
480 + errmsg); |
|
481 + |
|
482 + if (hdr && |
|
483 + (status == RDS_RDMA_DROPPED || |
|
484 + status == RDS_RDMA_REMOTE_ERROR)) { |
|
485 + |
|
486 + if (hdr->seq == seq) { |
|
487 + hdr->retry = 1; |
|
488 + if (hdr->seq > t->last_retry_seq) { |
|
489 + if (status == RDS_RDMA_REMOTE_ERROR) |
|
490 + hdr->rdma_remote_err = 1; |
|
491 + t->retry_token[t->retry_index] = token; |
|
492 + t->retry_index = (t->retry_index + 1) % |
|
493 + (2 * opts->req_depth); |
|
494 + t->retries += 1; |
|
495 + t->last_retry_seq = hdr->seq; |
|
496 + if (t->retries > 2 * opts->req_depth) |
|
497 + die("Exceeded MAX retry entries..\n"); |
|
498 + } |
|
499 + } else |
|
500 + die("SEQ Out-Of-Sync: %u/%u\n", hdr->seq, seq); |
|
501 + } else if (hdr) { |
|
502 + hdr->pending = 0; |
|
503 + hdr->retry = 0; |
|
504 + } |
|
505 + } else if (hdr) { |
|
506 + hdr->pending = 0; |
|
507 + hdr->retry = 0; |
|
508 } |
|
509 |
|
510 t->rdma_inflight[i] = 0; |
|
511 @@ -1007,6 +1191,9 @@ |
|
190 t->drain_rdmas = 0; |
512 t->drain_rdmas = 0; |
191 } |
513 } |
192 |
514 |
193 +#if defined(__SVR4) && defined(__sun) |
515 +#if defined(__SVR4) && defined(__sun) |
194 +#undef MSG_MAXIOVLEN |
516 +#undef MSG_MAXIOVLEN |
195 +#endif |
517 +#endif |
196 #define MSG_MAXIOVLEN 2 |
518 #define MSG_MAXIOVLEN 2 |
197 |
519 |
198 /* |
520 /* |
199 @@ -1560,7 +1609,12 @@ |
521 @@ -1018,11 +1205,14 @@ |
522 static char ctlbuf[1024]; |
|
523 struct cmsghdr *cmsg; |
|
524 |
|
525 - msg->msg_control = ctlbuf; |
|
526 - msg->msg_controllen = CMSG_SPACE(size); |
|
527 - |
|
528 - cmsg = CMSG_FIRSTHDR(msg); |
|
529 - cmsg->cmsg_level = sol; |
|
530 + if (!msg->msg_control) { |
|
531 + msg->msg_control = ctlbuf; |
|
532 + msg->msg_controllen = CMSG_SPACE(size); |
|
533 + cmsg = CMSG_FIRSTHDR(msg); |
|
534 + } else { |
|
535 + cmsg = (struct cmsghdr *)((char *)msg->msg_control + msg->msg_controllen); |
|
536 + msg->msg_controllen += CMSG_SPACE(size); |
|
537 + }cmsg->cmsg_level = sol; |
|
538 cmsg->cmsg_type = type; |
|
539 cmsg->cmsg_len = CMSG_LEN(size); |
|
540 memcpy(CMSG_DATA(cmsg), ptr, size); |
|
541 @@ -1034,7 +1224,7 @@ |
|
542 * the ACK packet. |
|
543 */ |
|
544 static void rdma_build_cmsg_xfer(struct msghdr *msg, const struct header *hdr, |
|
545 - unsigned int user_token, void *local_buf) |
|
546 + uint64_t user_token, void *local_buf) |
|
547 { |
|
548 |
|
549 #define RDS_MAX_IOV 512 /* FIX_ME - put this into rds.h or use socket max ?*/ |
|
550 @@ -1048,7 +1238,7 @@ |
|
551 rdma_size = hdr->rdma_size; |
|
552 rdma_vector = hdr->rdma_vector; |
|
553 |
|
554 - trace("RDS issuing rdma for token %x key %Lx len %u local_buf %p vector %u\n", |
|
555 + trace("RDS issuing rdma for token 0x%lx key 0x%llx len %d local_buf %p vector %d\n", |
|
556 user_token, |
|
557 (unsigned long long) hdr->rdma_key, |
|
558 rdma_size, local_buf, |
|
559 @@ -1102,6 +1292,15 @@ |
|
560 rdma_put_cmsg(msg, RDS_CMSG_RDMA_ARGS, &args, sizeof(args)); |
|
561 } |
|
562 |
|
563 +static void build_cmsg_async_send(struct msghdr *msg, uint64_t user_token) |
|
564 +{ |
|
565 + struct rds_asend_args args; |
|
566 + |
|
567 + args.flags |= RDS_SEND_NOTIFY_ME; |
|
568 + args.user_token = user_token; |
|
569 + rdma_put_cmsg(msg, RDS_CMSG_ASYNC_SEND, &args, sizeof(args)); |
|
570 +} |
|
571 + |
|
572 static void rdma_build_cmsg_dest(struct msghdr *msg, rds_rdma_cookie_t rdma_dest) |
|
573 { |
|
574 rdma_put_cmsg(msg, RDS_CMSG_RDMA_DEST, &rdma_dest, sizeof(rdma_dest)); |
|
575 @@ -1174,19 +1373,17 @@ |
|
576 hdr->index = qindex; |
|
577 } |
|
578 |
|
579 -static int send_packet(int fd, struct task *t, |
|
580 - struct header *hdr, unsigned int size) |
|
581 +static int send_msg(int fd, struct task *t, struct header *hdr, |
|
582 + unsigned int size, struct options *opts, |
|
583 + struct child_control *ctl) |
|
584 { |
|
585 - unsigned char buf[size], *rdma_flight_recorder = NULL; |
|
586 + unsigned char buf[size]; |
|
587 + uint8_t *rdma_flight_recorder = NULL; |
|
588 rds_rdma_cookie_t cookie = 0; |
|
589 struct msghdr msg; |
|
590 struct iovec iov; |
|
591 ssize_t ret; |
|
592 |
|
593 - /* Make sure we always have the current sequence number. |
|
594 - * When we send ACK packets, the seq that gets filled in is |
|
595 - * stale. */ |
|
596 - hdr->seq = t->send_seq; |
|
597 fill_hdr(buf, size, hdr); |
|
598 |
|
599 memset(&msg, 0, sizeof(msg)); |
|
600 @@ -1198,27 +1395,10 @@ |
|
601 iov.iov_base = buf; |
|
602 iov.iov_len = size; |
|
603 |
|
604 - /* If this is a REQ packet in which we pass the MR to the |
|
605 - * peer, extract the RDMA cookie and pass it on in the control |
|
606 - * message for now. */ |
|
607 - if (hdr->op == OP_REQ && hdr->rdma_op != 0) { |
|
608 - if (hdr->rdma_key != 0) { |
|
609 - /* We used GET_MR to obtain a key */ |
|
610 - rdma_build_cmsg_dest(&msg, hdr->rdma_key); |
|
611 - cookie = hdr->rdma_key; |
|
612 - hdr->rdma_key = 0; |
|
613 - } else { |
|
614 - /* Use the RDMA_MAP cmsg to have sendmsg do the |
|
615 - * mapping on the fly. */ |
|
616 - rdma_build_cmsg_map(&msg, hdr->rdma_addr, |
|
617 - hdr->rdma_size * hdr->rdma_vector, |
|
618 - &cookie); |
|
619 - } |
|
620 - } |
|
621 |
|
622 /* If this is an ACK packet with RDMA, build the cmsg |
|
623 - * header that goes with it. */ |
|
624 - if (hdr->op == OP_ACK && hdr->rdma_op != 0) { |
|
625 + * header that goes with it. */ |
|
626 + if (hdr->op == OP_ACK && hdr->rdma_op != 0 && !hdr->rdma_remote_err) { |
|
627 unsigned int qindex = hdr->index; |
|
628 |
|
629 if (t->rdma_inflight[qindex] != 0) { |
|
630 @@ -1230,16 +1410,35 @@ |
|
631 * |
|
632 * We return one of the more obscure error messages, |
|
633 * which we recognize and handle in the top loop. */ |
|
634 - trace("Drain RDMA 0x%x\n", rdma_user_token(t, qindex)); |
|
635 + trace("Drain RDMA 0x%lx\n", rdma_user_token(t, qindex, 0, hdr->seq)); |
|
636 errno = EBADSLT; |
|
637 return -1; |
|
638 } |
|
639 rdma_build_cmsg_xfer(&msg, hdr, |
|
640 - rdma_user_token(t, qindex), |
|
641 + rdma_user_token(t, qindex, 0, hdr->seq), |
|
642 t->local_buf[qindex]); |
|
643 rdma_flight_recorder = &t->rdma_inflight[qindex]; |
|
644 + } else if (opts->async) { |
|
645 + if (hdr->op == OP_REQ) |
|
646 + build_cmsg_async_send(&msg, |
|
647 + rdma_user_token(t, hdr->index, OP_REQ, hdr->seq)); |
|
648 + else |
|
649 + build_cmsg_async_send(&msg, |
|
650 + rdma_user_token(t, hdr->index, OP_ACK, hdr->seq)); |
|
651 } |
|
652 |
|
653 + if (hdr->op == OP_REQ && hdr->rdma_op != 0) { |
|
654 + if (hdr->rdma_key != 0) { |
|
655 + rdma_build_cmsg_dest(&msg, hdr->rdma_key); |
|
656 + cookie = hdr->rdma_key; |
|
657 + hdr->rdma_key = 0; |
|
658 + } else { |
|
659 + rdma_build_cmsg_map(&msg, hdr->rdma_addr, |
|
660 + hdr->rdma_size * hdr->rdma_vector, |
|
661 + &cookie); |
|
662 + } |
|
663 + } |
|
664 + |
|
665 ret = sendmsg(fd, &msg, 0); |
|
666 if (ret < 0) { |
|
667 if (errno != EAGAIN && errno != ENOBUFS) |
|
668 @@ -1256,10 +1455,41 @@ |
|
669 * lower 32bit of the cookie */ |
|
670 rdma_key_o_meter_add(cookie); |
|
671 } |
|
672 + |
|
673 + hdr->pending = 1; |
|
674 + |
|
675 + return ret; |
|
676 +} |
|
677 + |
|
678 +static int send_packet(int fd, struct task *t, |
|
679 + struct header *hdr, unsigned int size, |
|
680 + struct options *opts, struct child_control *ctl) |
|
681 +{ |
|
682 + ssize_t ret; |
|
683 + |
|
684 + /* Make sure we always have the current sequence number. |
|
685 + * When we send ACK packets, the seq that gets filled in is |
|
686 + * stale. */ |
|
687 + hdr->seq = t->send_seq; |
|
688 + |
|
689 + ret = send_msg(fd, t, hdr, size, opts, ctl); |
|
690 + if (ret < 0) return ret; |
|
691 + |
|
692 t->send_seq++; |
|
693 return ret; |
|
694 } |
|
695 |
|
696 +static int resend_packet(int fd, struct task *t, |
|
697 + struct header *hdr, unsigned int size, |
|
698 + struct options *opts, struct child_control *ctl) |
|
699 +{ |
|
700 + ssize_t ret; |
|
701 + |
|
702 + ret = send_msg(fd, t, hdr, size, opts, ctl); |
|
703 + |
|
704 + return ret; |
|
705 +} |
|
706 + |
|
707 static int send_one(int fd, struct task *t, |
|
708 struct options *opts, |
|
709 struct child_control *ctl) |
|
710 @@ -1266,12 +1496,16 @@ |
|
711 { |
|
712 struct timeval start; |
|
713 struct timeval stop; |
|
714 - struct header hdr; |
|
715 + struct header *hdr = &t->req_header[t->send_index]; |
|
716 int ret; |
|
717 |
|
718 - build_header(t, &hdr, OP_REQ, t->send_index); |
|
719 + if (opts->async && hdr->pending) { |
|
720 + return -1; |
|
721 + } |
|
722 + |
|
723 + build_header(t, hdr, OP_REQ, t->send_index); |
|
724 if (opts->rdma_size && t->send_seq > 10) |
|
725 - rdma_build_req(fd, &hdr, t, |
|
726 + rdma_build_req(fd, hdr, t, |
|
727 opts->rdma_size, |
|
728 opts->req_depth, |
|
729 opts->rw_mode, |
|
730 @@ -1279,7 +1513,7 @@ |
|
731 |
|
732 |
|
733 gettimeofday(&start, NULL); |
|
734 - ret = send_packet(fd, t, &hdr, opts->req_size); |
|
735 + ret = send_packet(fd, t, hdr, opts->req_size, opts, ctl); |
|
736 gettimeofday(&stop, NULL); |
|
737 |
|
738 if (ret < 0) |
|
739 @@ -1302,10 +1536,15 @@ |
|
740 struct child_control *ctl) |
|
741 { |
|
742 struct header *hdr = &t->ack_header[qindex]; |
|
743 + struct header *hdr2 = &t->ack2_header[qindex]; |
|
744 ssize_t ret; |
|
745 |
|
746 + if (opts->async && hdr2->pending) { |
|
747 + return -1; |
|
748 + } |
|
749 + |
|
750 /* send an ack in response to the req we just got */ |
|
751 - ret = send_packet(fd, t, hdr, opts->ack_size); |
|
752 + ret = send_packet(fd, t, hdr, opts->ack_size, opts, ctl); |
|
753 if (ret < 0) |
|
754 return ret; |
|
755 if (ret != opts->ack_size) |
|
756 @@ -1324,6 +1563,8 @@ |
|
757 break; |
|
758 } |
|
759 |
|
760 + memcpy(hdr2, hdr, sizeof(struct header)); |
|
761 + |
|
762 return ret; |
|
763 } |
|
764 |
|
765 @@ -1354,8 +1595,49 @@ |
|
766 struct child_control *ctl, |
|
767 int can_send, int do_work) |
|
768 { |
|
769 + struct header *hdr; |
|
770 + unsigned int index; |
|
771 + int req_size; |
|
772 + int num_retries = t->retries; |
|
773 + uint64_t token; |
|
774 + unsigned int type; |
|
775 + unsigned int index2; |
|
776 + unsigned int i; |
|
777 + |
|
778 + while (opts->async && num_retries > 0) { |
|
779 + index = (t->retry_index - num_retries + |
|
780 + (2 * opts->req_depth)) % (2 * opts->req_depth); |
|
781 + |
|
782 + token = t->retry_token[index]; |
|
783 + type = token & 0x03; |
|
784 + index2 = (token & 0xFFFFFFFF) >> 2; |
|
785 + i = index2 % opts->req_depth; |
|
786 + |
|
787 + if (type == OP_REQ) |
|
788 + hdr = &t->req_header[i]; |
|
789 + else |
|
790 + hdr = &t->ack2_header[i]; |
|
791 + |
|
792 + if (!hdr->retry) |
|
793 + goto next; |
|
794 + |
|
795 + if (hdr->op == OP_REQ) |
|
796 + req_size = opts->req_size; |
|
797 + else |
|
798 + req_size = opts->ack_size; |
|
799 + |
|
800 + if (resend_packet(fd, t, hdr, req_size, opts, ctl) < 0) { |
|
801 + return -1; |
|
802 + } |
|
803 + hdr->retry = 0; |
|
804 +next: |
|
805 + num_retries--; |
|
806 + } |
|
807 + t->last_retry_seq = t->retries = 0; |
|
808 + |
|
809 if (ack_anything(fd, t, opts, ctl, can_send) < 0) |
|
810 return -1; |
|
811 + |
|
812 while (do_work && t->pending < opts->req_depth) { |
|
813 if (!can_send) |
|
814 goto eagain; |
|
815 @@ -1375,7 +1657,8 @@ |
|
816 rds_rdma_cookie_t *cookie, |
|
817 struct sockaddr_in *sin, |
|
818 struct timeval *tstamp, |
|
819 - struct task *tasks) |
|
820 + struct task *tasks, |
|
821 + struct options *opts) |
|
822 { |
|
823 struct cmsghdr *cmsg; |
|
824 char cmsgbuf[256]; |
|
825 @@ -1400,13 +1683,13 @@ |
|
826 return ret; |
|
827 if (ret && ret < sizeof(struct header)) |
|
828 die("recvmsg() returned short data: %zd", ret); |
|
829 - if (msg.msg_namelen < sizeof(struct sockaddr_in)) |
|
830 + if (ret && msg.msg_namelen < sizeof(struct sockaddr_in)) |
|
831 die("socklen = %d < sizeof(sin) (%zu)\n", |
|
832 msg.msg_namelen, sizeof(struct sockaddr_in)); |
|
833 |
|
834 /* See if the message comes with a RDMA destination */ |
|
835 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { |
|
836 - struct rds_rdma_notify notify; |
|
837 + struct rds_rdma_send_notify notify; |
|
838 |
|
839 if (cmsg->cmsg_level != sol) |
|
840 continue; |
|
841 @@ -1436,7 +1719,7 @@ |
|
842 if (cmsg->cmsg_len < CMSG_LEN(sizeof(notify))) |
|
843 die("RDS_CMSG_RDMA_DEST data too small"); |
|
844 memcpy(¬ify, CMSG_DATA(cmsg), sizeof(notify)); |
|
845 - rdma_mark_completed(tasks, notify.user_token, notify.status); |
|
846 + rdma_mark_completed(tasks, notify.user_token, notify.status, opts); |
|
847 break; |
|
848 } |
|
849 } |
|
850 @@ -1445,7 +1728,8 @@ |
|
851 |
|
852 static int recv_one(int fd, struct task *tasks, |
|
853 struct options *opts, |
|
854 - struct child_control *ctl) |
|
855 + struct child_control *ctl, |
|
856 + struct child_control *all_ctl) |
|
857 { |
|
858 char buf[max(opts->req_size, opts->ack_size)]; |
|
859 rds_rdma_cookie_t rdma_dest = 0; |
|
860 @@ -1456,15 +1740,18 @@ |
|
861 uint16_t expect_index; |
|
862 int task_index; |
|
863 ssize_t ret; |
|
864 + int check_status; |
|
865 |
|
866 - ret = recv_message(fd, buf, sizeof(buf), &rdma_dest, &sin, &tstamp, tasks); |
|
867 + |
|
868 + ret = recv_message(fd, buf, sizeof(buf), &rdma_dest, &sin, &tstamp, tasks, opts); |
|
869 if (ret < 0) |
|
870 return ret; |
|
871 |
|
872 /* If we received only RDMA completions or cong updates, |
|
873 * ret will be 0 */ |
|
874 - if (ret == 0) |
|
875 + if (ret == 0) { |
|
876 return 0; |
|
877 + } |
|
878 |
|
879 /* check the incoming sequence number */ |
|
880 task_index = ntohs(sin.sin_port) - opts->starting_port - 1; |
|
881 @@ -1508,16 +1795,32 @@ |
|
882 hdr.to_port = t->src_addr.sin_port; |
|
883 hdr.index = expect_index; |
|
884 |
|
885 - if (check_hdr(buf, ret, &hdr)) |
|
886 - die("header from %s:%u to id %u bogus\n", |
|
887 - inet_ntoa(sin.sin_addr), htons(sin.sin_port), |
|
888 - ntohs(t->src_addr.sin_port)); |
|
889 + check_status = check_hdr(buf, ret, &hdr, opts); |
|
890 + if (check_status) { |
|
891 + if (check_status > 0) { |
|
892 + die("header from %s:%u to id %u bogus\n", |
|
893 + inet_ntoa(sin.sin_addr), htons(sin.sin_port), |
|
894 + ntohs(t->src_addr.sin_port)); |
|
895 + } else |
|
896 + return 0; |
|
897 + } |
|
898 |
|
899 if (hdr.op == OP_ACK) { |
|
900 - stat_inc(&ctl->cur[S_RTT_USECS], |
|
901 - usec_sub(&tstamp, &t->send_time[expect_index])); |
|
902 - t->pending -= 1; |
|
903 + uint64_t rtt_time = |
|
904 + usec_sub(&tstamp, &t->send_time[expect_index]); |
|
905 |
|
906 + stat_inc(&ctl->cur[S_RTT_USECS], rtt_time); |
|
907 + if (rtt_time > rtt_threshold) |
|
908 + print_outlier("Found RTT = 0x%lx\n", rtt_time); |
|
909 + |
|
910 + if (show_histogram) |
|
911 + { |
|
912 + ctl->latency_histogram[get_bucket(rtt_time)]++; |
|
913 + } |
|
914 + |
|
915 + if (t->pending > 0) |
|
916 + t->pending -= 1; |
|
917 + |
|
918 if (in_hdr.rdma_key) |
|
919 rdma_process_ack(fd, &in_hdr, ctl); |
|
920 } else { |
|
921 @@ -1549,6 +1852,7 @@ |
|
922 } |
|
923 |
|
924 static void run_child(pid_t parent_pid, struct child_control *ctl, |
|
925 + struct child_control *all_ctl, |
|
926 struct options *opts, uint16_t id, int active) |
|
927 { |
|
928 struct sockaddr_in sin; |
|
929 @@ -1559,8 +1863,15 @@ |
|
930 struct task tasks[opts->nr_tasks]; |
|
200 struct timeval start; |
931 struct timeval start; |
201 int do_work = opts->simplex ? active : 1; |
932 int do_work = opts->simplex ? active : 1; |
202 |
933 + int j; |
934 |
|
935 + |
|
203 +#if defined(__SVR4) && defined(__sun) |
936 +#if defined(__SVR4) && defined(__sun) |
204 + set_my_lgrp(); |
937 + set_my_lgrp(); |
205 + sin.sin_family = AF_INET_OFFLOAD; |
938 + sin.sin_family = AF_INET_OFFLOAD; |
206 +#else |
939 +#else |
207 sin.sin_family = AF_INET; |
940 sin.sin_family = AF_INET; |
208 +#endif |
941 +#endif |
209 sin.sin_port = htons(opts->starting_port + 1 + id); |
942 sin.sin_port = htons(opts->starting_port + 1 + id); |
210 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
943 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
211 |
944 |
212 @@ -1572,7 +1626,11 @@ |
945 @@ -1572,7 +1883,11 @@ |
213 for (i = 0; i < opts->nr_tasks; i++) { |
946 for (i = 0; i < opts->nr_tasks; i++) { |
214 tasks[i].nr = i; |
947 tasks[i].nr = i; |
215 tasks[i].src_addr = sin; |
948 tasks[i].src_addr = sin; |
216 +#if defined(__SVR4) && defined(__sun) |
949 +#if defined(__SVR4) && defined(__sun) |
217 + tasks[i].dst_addr.sin_family = AF_INET_OFFLOAD; |
950 + tasks[i].dst_addr.sin_family = AF_INET_OFFLOAD; |
219 tasks[i].dst_addr.sin_family = AF_INET; |
952 tasks[i].dst_addr.sin_family = AF_INET; |
220 +#endif |
953 +#endif |
221 tasks[i].dst_addr.sin_addr.s_addr = htonl(opts->send_addr); |
954 tasks[i].dst_addr.sin_addr.s_addr = htonl(opts->send_addr); |
222 tasks[i].dst_addr.sin_port = htons(opts->starting_port + 1 + i); |
955 tasks[i].dst_addr.sin_port = htons(opts->starting_port + 1 + i); |
223 tasks[i].send_time = alloca(opts->req_depth * sizeof(struct timeval)); |
956 tasks[i].send_time = alloca(opts->req_depth * sizeof(struct timeval)); |
224 @@ -1625,6 +1683,10 @@ |
957 @@ -1581,6 +1896,15 @@ |
958 tasks[i].rdma_buf = alloca(opts->req_depth * sizeof(uint64_t *)); |
|
959 tasks[i].local_buf = alloca(opts->req_depth * sizeof(uint64_t *)); |
|
960 tasks[i].ack_header = alloca(opts->req_depth * sizeof(struct header)); |
|
961 + tasks[i].ack2_header = alloca(opts->req_depth * sizeof(struct header)); |
|
962 + for (j=0;j<opts->req_depth;j++) |
|
963 + tasks[i].ack2_header[j].pending = 0; |
|
964 + |
|
965 + tasks[i].req_header = alloca(opts->req_depth * sizeof(struct header)); |
|
966 + for (j=0;j<opts->req_depth;j++) |
|
967 + tasks[i].req_header[j].pending = 0; |
|
968 + |
|
969 + tasks[i].retry_token = alloca(2 * opts->req_depth * sizeof(uint64_t)); |
|
970 tasks[i].rdma_next_op = (i & 1)? RDMA_OP_READ : RDMA_OP_WRITE; |
|
971 } |
|
972 |
|
973 @@ -1611,7 +1935,7 @@ |
|
974 |
|
975 check_parent(parent_pid); |
|
976 |
|
977 - ret = poll(&pfd, 1, -1); |
|
978 + ret = poll(&pfd, 1, 1000); |
|
979 if (ret < 0) { |
|
980 if (errno == EINTR) |
|
981 continue; |
|
982 @@ -1621,10 +1945,14 @@ |
|
983 pfd.events = POLLIN; |
|
984 |
|
985 if (pfd.revents & POLLIN) { |
|
986 - while (recv_one(fd, tasks, opts, ctl) >= 0) |
|
987 + while (recv_one(fd, tasks, opts, ctl, all_ctl) >= 0) |
|
225 ; |
988 ; |
226 } |
989 } |
227 |
990 |
228 + /* stop sending if in shutdown phase */ |
991 + /* stop sending if in shutdown phase */ |
229 + if (ctl->stopping) |
992 + if (ctl->stopping) |
230 + continue; |
993 + continue; |
231 + |
994 + |
232 /* keep the pipeline full */ |
995 /* keep the pipeline full */ |
233 can_send = !!(pfd.revents & POLLOUT); |
996 can_send = !!(pfd.revents & POLLOUT); |
234 for (i = 0, t = tasks; i < opts->nr_tasks; i++, t++) { |
997 for (i = 0, t = tasks; i < opts->nr_tasks; i++, t++) { |
235 @@ -1665,8 +1727,12 @@ |
998 @@ -1633,6 +1961,7 @@ |
999 if (t->drain_rdmas) |
|
1000 continue; |
|
1001 if (send_anything(fd, t, opts, ctl, can_send, do_work) < 0) { |
|
1002 + |
|
1003 pfd.events |= POLLOUT; |
|
1004 |
|
1005 /* If the send queue is full, we will see EAGAIN. |
|
1006 @@ -1665,8 +1994,12 @@ |
|
236 uint32_t i; |
1007 uint32_t i; |
237 |
1008 |
238 len = opts->nr_tasks * sizeof(*ctl); |
1009 len = opts->nr_tasks * sizeof(*ctl); |
239 +#if defined(__SVR4) && defined(__sun) |
1010 +#if defined(__SVR4) && defined(__sun) |
240 + ctl = (struct child_control *)mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
1011 + ctl = (struct child_control *)mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
243 0, 0); |
1014 0, 0); |
244 +#endif |
1015 +#endif |
245 if (ctl == MAP_FAILED) |
1016 if (ctl == MAP_FAILED) |
246 die("mmap of %u child control structs failed", opts->nr_tasks); |
1017 die("mmap of %u child control structs failed", opts->nr_tasks); |
247 |
1018 |
248 @@ -1699,7 +1765,7 @@ |
1019 @@ -1688,7 +2021,7 @@ |
1020 control_fd = -1; |
|
1021 } |
|
1022 rdma_key_o_meter_set_self(i); |
|
1023 - run_child(parent, ctl + i, opts, i, active); |
|
1024 + run_child(parent, ctl + i, ctl, opts, i, active); |
|
1025 exit(0); |
|
1026 } |
|
1027 ctl[i].pid = pid; |
|
1028 @@ -1699,7 +2032,7 @@ |
|
249 continue; |
1029 continue; |
250 pid = waitpid(-1, NULL, WNOHANG); |
1030 pid = waitpid(-1, NULL, WNOHANG); |
251 if (pid) |
1031 if (pid) |
252 - die("child %u (pid %u) exited\n", i, pid); |
1032 - die("child %u (pid %u) exited\n", i, pid); |
253 + die("child %u (pid %u) exited\n", i, (int)pid); |
1033 + die("child %u (pid %u) exited\n", i, (int)pid); |
254 sleep(1); |
1034 sleep(1); |
255 i--; /* try this child again */ |
1035 i--; /* try this child again */ |
256 } |
1036 } |
257 @@ -1823,6 +1889,7 @@ |
1037 @@ -1823,6 +2156,7 @@ |
258 |
1038 |
259 if (disable) |
1039 if (disable) |
260 return; |
1040 return; |
261 +#if !(defined(__SVR4) && defined(__sun)) |
1041 +#if !(defined(__SVR4) && defined(__sun)) |
262 if ((fp = fopen("/proc/stat", "r")) == NULL) { |
1042 if ((fp = fopen("/proc/stat", "r")) == NULL) { |
263 fprintf(stderr, "Cannot open /proc/stat (%s) - " |
1043 fprintf(stderr, "Cannot open /proc/stat (%s) - " |
264 "not printing cpu stats\n", |
1044 "not printing cpu stats\n", |
265 @@ -1856,10 +1923,37 @@ |
1045 @@ -1856,10 +2190,37 @@ |
266 } |
1046 } |
267 } |
1047 } |
268 fclose(fp); |
1048 fclose(fp); |
269 +#else |
1049 +#else |
270 +#define NSEC_TO_TICK(v) (v * sysconf(_SC_CLK_TCK)/1000000000) |
1050 +#define NSEC_TO_TICK(v) (v * sysconf(_SC_CLK_TCK)/1000000000) |
298 + ",intr:count"); |
1078 + ",intr:count"); |
299 +#endif |
1079 +#endif |
300 } else { |
1080 } else { |
301 struct sys_stats sys; |
1081 struct sys_stats sys; |
302 unsigned long sum = 0; |
1082 unsigned long sum = 0; |
303 @@ -1884,6 +1978,7 @@ |
1083 @@ -1884,6 +2245,7 @@ |
304 * 5 irq |
1084 * 5 irq |
305 * 6 softirq |
1085 * 6 softirq |
306 */ |
1086 */ |
307 +#if !(defined(__SVR4) && defined(__sun)) |
1087 +#if !(defined(__SVR4) && defined(__sun)) |
308 printf(",%f,%f,%f,%f,%Lu", |
1088 printf(",%f,%f,%f,%f,%Lu", |
309 (sys.times[0] + sys.times[1]) * scale, |
1089 (sys.times[0] + sys.times[1]) * scale, |
310 sys.times[2] * scale, |
1090 sys.times[2] * scale, |
311 @@ -1890,6 +1985,14 @@ |
1091 @@ -1890,6 +2252,14 @@ |
312 (sys.times[3] + sys.times[4]) * scale, |
1092 (sys.times[3] + sys.times[4]) * scale, |
313 (sys.times[5] + sys.times[6]) * scale, |
1093 (sys.times[5] + sys.times[6]) * scale, |
314 sys.intr); |
1094 sys.intr); |
315 +#else |
1095 +#else |
316 + /* Solaris kstat doesn't provide irq/softirq info. */ |
1096 + /* Solaris kstat doesn't provide irq/softirq info. */ |
321 + sys.intr); |
1101 + sys.intr); |
322 +#endif |
1102 +#endif |
323 } |
1103 } |
324 prev = current; |
1104 prev = current; |
325 } |
1105 } |
326 @@ -1903,6 +2006,10 @@ |
1106 @@ -1903,6 +2273,10 @@ |
327 static socklen_t buflen = 0; |
1107 static socklen_t buflen = 0; |
328 static int sock_fd = -1; |
1108 static int sock_fd = -1; |
329 int i, count, item_size; |
1109 int i, count, item_size; |
330 +#if defined(__SVR4) && defined(__sun) |
1110 +#if defined(__SVR4) && defined(__sun) |
331 + socklen_t len; |
1111 + socklen_t len; |
332 + struct rds_info_arg arg; |
1112 + struct rds_info_arg arg; |
333 +#endif |
1113 +#endif |
334 |
1114 |
335 if (sock_fd < 0) { |
1115 if (sock_fd < 0) { |
336 sock_fd = socket(pf, SOCK_SEQPACKET, 0); |
1116 sock_fd = socket(pf, SOCK_SEQPACKET, 0); |
337 @@ -1912,6 +2019,7 @@ |
1117 @@ -1912,6 +2286,7 @@ |
338 |
1118 |
339 /* We should only loop once on the first call; after that the |
1119 /* We should only loop once on the first call; after that the |
340 * buffer requirements for RDS counters should not change. */ |
1120 * buffer requirements for RDS counters should not change. */ |
341 +#if !(defined(__SVR4) && defined(__sun)) |
1121 +#if !(defined(__SVR4) && defined(__sun)) |
342 while ((item_size = getsockopt(sock_fd, sol, RDS_INFO_COUNTERS, curr, &buflen)) < 0) { |
1122 while ((item_size = getsockopt(sock_fd, sol, RDS_INFO_COUNTERS, curr, &buflen)) < 0) { |
343 if (errno != ENOSPC) |
1123 if (errno != ENOSPC) |
344 die_errno("getsockopt(RDS_INFO_COUNTERS) failed"); |
1124 die_errno("getsockopt(RDS_INFO_COUNTERS) failed"); |
345 @@ -1919,7 +2027,29 @@ |
1125 @@ -1919,7 +2294,29 @@ |
346 if (!curr) |
1126 if (!curr) |
347 die_errno("Cannot allocate buffer for stats counters"); |
1127 die_errno("Cannot allocate buffer for stats counters"); |
348 } |
1128 } |
349 +#else |
1129 +#else |
350 + int retcode; |
1130 + int retcode; |
370 +#endif |
1150 +#endif |
371 + |
1151 + |
372 if (item_size > sizeof(*ctr)) |
1152 if (item_size > sizeof(*ctr)) |
373 die("Bad counter item size in RDS_INFO_COUNTERS (got %d, max %zd)\n", |
1153 die("Bad counter item size in RDS_INFO_COUNTERS (got %d, max %zd)\n", |
374 item_size, sizeof(*ctr)); |
1154 item_size, sizeof(*ctr)); |
375 @@ -1932,8 +2062,11 @@ |
1155 @@ -1932,8 +2329,11 @@ |
376 } |
1156 } |
377 |
1157 |
378 for (i = 0; i < count; ++i) |
1158 for (i = 0; i < count; ++i) |
379 +#if !(defined(__SVR4) && defined(__sun)) |
1159 +#if !(defined(__SVR4) && defined(__sun)) |
380 memcpy(ctr + i, curr + i * item_size, item_size); |
1160 memcpy(ctr + i, curr + i * item_size, item_size); |
383 + memcpy(ctr + i, ((void *)(uintptr_t)arg.datap) + i * item_size, item_size); |
1163 + memcpy(ctr + i, ((void *)(uintptr_t)arg.datap) + i * item_size, item_size); |
384 +#endif |
1164 +#endif |
385 gettimeofday(&now, NULL); |
1165 gettimeofday(&now, NULL); |
386 |
1166 |
387 if (initialize) { |
1167 if (initialize) { |
388 @@ -1957,6 +2090,10 @@ |
1168 @@ -1957,6 +2357,10 @@ |
389 memcpy(prev, ctr, count * sizeof(*ctr)); |
1169 memcpy(prev, ctr, count * sizeof(*ctr)); |
390 last_ts = now; |
1170 last_ts = now; |
391 |
1171 |
392 +#if defined(__SVR4) && defined(__sun) |
1172 +#if defined(__SVR4) && defined(__sun) |
393 + free((void *)(uintptr_t)arg.datap); |
1173 + free((void *)(uintptr_t)arg.datap); |
394 +#endif |
1174 +#endif |
395 + |
1175 + |
396 get_stats(initialize); |
1176 get_stats(initialize); |
397 } |
1177 } |
398 |
1178 |
399 @@ -1967,7 +2104,7 @@ |
1179 @@ -1967,7 +2371,7 @@ |
400 |
1180 |
401 pid = waitpid(-1, &status, wflags); |
1181 pid = waitpid(-1, &status, wflags); |
402 if (pid < 0) |
1182 if (pid < 0) |
403 - die("waitpid returned %u", pid); |
1183 - die("waitpid returned %u", pid); |
404 + die("waitpid returned %u", (int)pid); |
1184 + die("waitpid returned %u", (int)pid); |
405 if (pid == 0) |
1185 if (pid == 0) |
406 return 0; |
1186 return 0; |
407 |
1187 |
408 @@ -1975,15 +2112,15 @@ |
1188 @@ -1975,15 +2379,15 @@ |
409 if (WEXITSTATUS(status) == 0) |
1189 if (WEXITSTATUS(status) == 0) |
410 return 1; |
1190 return 1; |
411 die("child pid %u exited with status %d\n", |
1191 die("child pid %u exited with status %d\n", |
412 - pid, WEXITSTATUS(status)); |
1192 - pid, WEXITSTATUS(status)); |
413 + (int)pid, WEXITSTATUS(status)); |
1193 + (int)pid, WEXITSTATUS(status)); |
422 - die("child pid %u wait status %d\n", pid, status); |
1202 - die("child pid %u wait status %d\n", pid, status); |
423 + die("child pid %u wait status %d\n", (int)pid, status); |
1203 + die("child pid %u wait status %d\n", (int)pid, status); |
424 } |
1204 } |
425 |
1205 |
426 static void release_children_and_wait(struct options *opts, |
1206 static void release_children_and_wait(struct options *opts, |
427 @@ -2139,7 +2276,12 @@ |
1207 @@ -1995,9 +2399,13 @@ |
1208 struct counter summary[NR_STATS]; |
|
1209 struct timeval start, end, now, first_ts, last_ts; |
|
1210 double cpu_total = 0; |
|
1211 - uint16_t i, cpu_samples = 0; |
|
1212 + uint16_t i, j, cpu_samples = 0; |
|
1213 uint16_t nr_running; |
|
1214 + uint64_t latency_histogram[MAX_BUCKETS]; |
|
1215 |
|
1216 + if (show_histogram) |
|
1217 + memset(latency_histogram, 0, sizeof(latency_histogram)); |
|
1218 + |
|
1219 gettimeofday(&start, NULL); |
|
1220 start.tv_sec += 2; |
|
1221 for (i = 0; i < opts->nr_tasks; i++) |
|
1222 @@ -2139,7 +2547,12 @@ |
|
428 control_fd = -1; |
1223 control_fd = -1; |
429 |
1224 |
430 if (nr_running) { |
1225 if (nr_running) { |
431 + /* let everything gracefully stop before we kill the chillins */ |
1226 + /* let everything gracefully stop before we kill the chillins */ |
432 for (i = 0; i < opts->nr_tasks; i++) |
1227 for (i = 0; i < opts->nr_tasks; i++) |
435 + |
1230 + |
436 + for (i = 0; i < opts->nr_tasks; i++) |
1231 + for (i = 0; i < opts->nr_tasks; i++) |
437 kill(ctl[i].pid, SIGTERM); |
1232 kill(ctl[i].pid, SIGTERM); |
438 stop_soakers(soak_arr); |
1233 stop_soakers(soak_arr); |
439 } |
1234 } |
440 @@ -2517,7 +2659,11 @@ |
1235 @@ -2167,6 +2580,19 @@ |
1236 avg(&summary[S_SENDMSG_USECS]), |
|
1237 avg(&summary[S_RTT_USECS]), |
|
1238 soak_arr? scale * cpu_total : -1.0); |
|
1239 + |
|
1240 + if (show_histogram) |
|
1241 + { |
|
1242 + for (i = 0; i < opts->nr_tasks; i++) |
|
1243 + for (j=0;j < MAX_BUCKETS; j++) |
|
1244 + latency_histogram[j] += ctl[i].latency_histogram[j]; |
|
1245 + |
|
1246 + printf("\nRTT histogram\n"); |
|
1247 + printf("RTT (us) \t\t Count\n"); |
|
1248 + for (i=0;i < MAX_BUCKETS; i++) |
|
1249 + printf("[%6u - %6u] \t\t %8u\n", 1 << i, 1 << (i+1), |
|
1250 + (unsigned int)latency_histogram[i]); |
|
1251 + } |
|
1252 } |
|
1253 } |
|
1254 |
|
1255 @@ -2262,6 +2688,9 @@ |
|
1256 dst->simplex = src->simplex; /* byte sized */ |
|
1257 dst->rw_mode = src->rw_mode; /* byte sized */ |
|
1258 dst->rdma_vector = htonl(src->rdma_vector); |
|
1259 + dst->tos = src->tos; |
|
1260 + dst->reset = src->reset; |
|
1261 + dst->async = src->async; |
|
1262 } |
|
1263 |
|
1264 static void decode_options(struct options *dst, const struct options *src) |
|
1265 @@ -2295,6 +2724,9 @@ |
|
1266 dst->simplex = src->simplex; /* byte sized */ |
|
1267 dst->rw_mode = src->rw_mode; /* byte sized */ |
|
1268 dst->rdma_vector = ntohl(src->rdma_vector); |
|
1269 + dst->tos = src->tos; |
|
1270 + dst->reset = src->reset; |
|
1271 + dst->async = src->async; |
|
1272 } |
|
1273 |
|
1274 static void verify_option_encdec(const struct options *opts) |
|
1275 @@ -2316,6 +2748,30 @@ |
|
1276 die("encode/decode check of options struct failed"); |
|
1277 } |
|
1278 |
|
1279 +static void reset_conn(struct options *opts) |
|
1280 +{ |
|
1281 + struct rds_reset val; |
|
1282 + int fd; |
|
1283 + struct sockaddr_in sin; |
|
1284 + |
|
1285 + sin.sin_family = AF_INET; |
|
1286 + sin.sin_port = htons(opts->starting_port); |
|
1287 + sin.sin_addr.s_addr = htonl(opts->receive_addr); |
|
1288 + |
|
1289 + fd = bound_socket(pf, SOCK_SEQPACKET, 0, &sin); |
|
1290 + |
|
1291 + val.tos = opts->tos; |
|
1292 +#if defined(__SVR4) && defined(__sun) |
|
1293 + val.src = htonl(opts->receive_addr); |
|
1294 + val.dst = htonl(opts->send_addr); |
|
1295 +#else |
|
1296 + val.src.s_addr = htonl(opts->receive_addr); |
|
1297 + val.dst.s_addr = htonl(opts->send_addr); |
|
1298 +#endif |
|
1299 + if (setsockopt(fd, sol, RDS_CONN_RESET, &val, sizeof(val))) |
|
1300 + die_errno("setsockopt RDS_CONN_RESET failed"); |
|
1301 +} |
|
1302 + |
|
1303 static int active_parent(struct options *opts, struct soak_control *soak_arr) |
|
1304 { |
|
1305 struct options enc_options; |
|
1306 @@ -2324,6 +2780,11 @@ |
|
1307 int fd; |
|
1308 uint8_t ok; |
|
1309 |
|
1310 + if (opts->reset) { |
|
1311 + reset_conn(opts); |
|
1312 + return 0; |
|
1313 + } |
|
1314 + |
|
1315 if (opts->show_params) { |
|
1316 unsigned int k; |
|
1317 |
|
1318 @@ -2517,7 +2978,11 @@ |
|
441 /* an extra terminating entry which will be all 0s */ |
1319 /* an extra terminating entry which will be all 0s */ |
442 len = (nr_soak + 1) * sizeof(struct soak_control); |
1320 len = (nr_soak + 1) * sizeof(struct soak_control); |
443 soak_arr = mmap(NULL, len, PROT_READ|PROT_WRITE, |
1321 soak_arr = mmap(NULL, len, PROT_READ|PROT_WRITE, |
444 +#if defined(__SVR4) && defined(__sun) |
1322 +#if defined(__SVR4) && defined(__sun) |
445 + MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
1323 + MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
447 MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
1325 MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
448 +#endif |
1326 +#endif |
449 if (soak_arr == MAP_FAILED) |
1327 if (soak_arr == MAP_FAILED) |
450 die("mmap of %ld soak control structs failed", nr_soak); |
1328 die("mmap of %ld soak control structs failed", nr_soak); |
451 |
1329 |
452 @@ -2589,6 +2735,7 @@ |
1330 @@ -2572,6 +3037,10 @@ |
1331 OPT_CONNECT_RETRIES, |
|
1332 OPT_USE_CONG_MONITOR, |
|
1333 OPT_PERFDATA, |
|
1334 + OPT_SHOW_OUTLIERS, |
|
1335 + OPT_SHOW_HISTOGRAM, |
|
1336 + OPT_RESET, |
|
1337 + OPT_ASYNC, |
|
1338 }; |
|
1339 |
|
1340 static struct option long_options[] = { |
|
1341 @@ -2584,11 +3053,13 @@ |
|
1342 { "send-addr", required_argument, NULL, 's' }, |
|
1343 { "port", required_argument, NULL, 'p' }, |
|
1344 { "time", required_argument, NULL, 'T' }, |
|
1345 +{ "tos", required_argument, NULL, 'Q' }, |
|
1346 { "report-cpu", no_argument, NULL, 'c' }, |
|
1347 { "report-summary", no_argument, NULL, 'z' }, |
|
453 { "rtprio", no_argument, NULL, 'R' }, |
1348 { "rtprio", no_argument, NULL, 'R' }, |
454 { "verify", no_argument, NULL, 'v' }, |
1349 { "verify", no_argument, NULL, 'v' }, |
455 { "trace", no_argument, NULL, 'V' }, |
1350 { "trace", no_argument, NULL, 'V' }, |
456 +{ "lgrpid", required_argument, NULL, 'g' }, |
1351 +{ "lgrpid", required_argument, NULL, 'g' }, |
457 |
1352 |
458 { "rdma-use-once", required_argument, NULL, OPT_RDMA_USE_ONCE }, |
1353 { "rdma-use-once", required_argument, NULL, OPT_RDMA_USE_ONCE }, |
459 { "rdma-use-get-mr", required_argument, NULL, OPT_RDMA_USE_GET_MR }, |
1354 { "rdma-use-get-mr", required_argument, NULL, OPT_RDMA_USE_GET_MR }, |
460 @@ -2652,7 +2799,7 @@ |
1355 @@ -2601,6 +3072,10 @@ |
1356 { "show-perfdata", no_argument, NULL, OPT_PERFDATA }, |
|
1357 { "connect-retries", required_argument, NULL, OPT_CONNECT_RETRIES }, |
|
1358 { "use-cong-monitor", required_argument, NULL, OPT_USE_CONG_MONITOR }, |
|
1359 +{ "show-outliers", required_argument, NULL, OPT_SHOW_OUTLIERS }, |
|
1360 +{ "show-histogram", no_argument, NULL, OPT_SHOW_HISTOGRAM }, |
|
1361 +{ "reset", no_argument, NULL, OPT_RESET }, |
|
1362 +{ "async", no_argument, NULL, OPT_ASYNC }, |
|
1363 |
|
1364 { NULL } |
|
1365 }; |
|
1366 @@ -2640,6 +3115,8 @@ |
|
1367 opts.use_cong_monitor = 1; |
|
1368 opts.rdma_use_fence = 1; |
|
1369 opts.rdma_cache_mrs = 0; |
|
1370 + opts.rdma_use_once = 0; |
|
1371 + opts.rdma_use_get_mr = 0; |
|
1372 opts.rdma_alignment = 0; |
|
1373 opts.rdma_key_o_meter = 0; |
|
1374 opts.show_params = 0; |
|
1375 @@ -2648,11 +3125,16 @@ |
|
1376 opts.simplex = 0; |
|
1377 opts.rw_mode = 0; |
|
1378 opts.rdma_vector = 1; |
|
1379 + rtt_threshold = ~0U; |
|
1380 + show_histogram = 0; |
|
1381 + opts.tos = 0; |
|
1382 + opts.reset = 0; |
|
1383 + opts.async = 0; |
|
1384 |
|
461 while(1) { |
1385 while(1) { |
462 int c, index; |
1386 int c, index; |
463 |
1387 |
464 - c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVz", |
1388 - c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVz", |
465 + c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVg:z", |
1389 + c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:Q:vVg:z", |
466 long_options, &index); |
1390 long_options, &index); |
467 if (c == -1) |
1391 if (c == -1) |
468 break; |
1392 break; |
469 @@ -2711,6 +2858,10 @@ |
1393 @@ -2702,6 +3184,9 @@ |
1394 case 'T': |
|
1395 opts.run_time = parse_ull(optarg, (uint32_t)~0); |
|
1396 break; |
|
1397 + case 'Q': |
|
1398 + opts.tos = parse_ull(optarg, (uint8_t)~0); |
|
1399 + break; |
|
1400 case 'z': |
|
1401 opts.summary_only = 1; |
|
1402 break; |
|
1403 @@ -2711,9 +3196,25 @@ |
|
470 case 'V': |
1404 case 'V': |
471 opts.tracing = 1; |
1405 opts.tracing = 1; |
472 break; |
1406 break; |
473 + case 'g': |
1407 + case 'g': |
474 + lgrp_id = (lgrp_id_t)parse_ull(optarg, |
1408 + lgrp_id = (lgrp_id_t)parse_ull(optarg, |
475 + (uint32_t)~0); |
1409 + (uint32_t)~0); |
476 + break; |
1410 + break; |
1411 + case OPT_SHOW_OUTLIERS: |
|
1412 + rtt_threshold = parse_ull(optarg, ~0U); |
|
1413 + break; |
|
1414 + case OPT_SHOW_HISTOGRAM: |
|
1415 + show_histogram = 1; |
|
1416 + break; |
|
477 case OPT_USE_CONG_MONITOR: |
1417 case OPT_USE_CONG_MONITOR: |
478 opts.use_cong_monitor = parse_ull(optarg, 1); |
1418 opts.use_cong_monitor = parse_ull(optarg, 1); |
479 break; |
1419 break; |
480 @@ -2786,6 +2937,7 @@ |
1420 + case OPT_RESET: |
1421 + opts.reset = 1; |
|
1422 + break; |
|
1423 + case OPT_ASYNC: |
|
1424 + opts.async = 1; |
|
1425 + break; |
|
1426 case OPT_RDMA_USE_ONCE: |
|
1427 opts.rdma_use_once = parse_ull(optarg, 1); |
|
1428 break; |
|
1429 @@ -2786,6 +3287,7 @@ |
|
481 if (opts.rdma_size && 0) |
1430 if (opts.rdma_size && 0) |
482 opts.rdma_size = (opts.rdma_size + 4095) & ~4095; |
1431 opts.rdma_size = (opts.rdma_size + 4095) & ~4095; |
483 |
1432 |
484 + set_my_lgrp(); |
1433 + set_my_lgrp(); |
485 opt = opts; |
1434 opt = opts; |
1288 |
2237 |
1289 |
2238 |
1290 diff -r -u /tmp/rds-tools-2.0.4/rds-stress.1 rds-tools-2.0.7/rds-stress.1 |
2239 diff -r -u /tmp/rds-tools-2.0.4/rds-stress.1 rds-tools-2.0.7/rds-stress.1 |
1291 --- /tmp/rds-tools-2.0.4/rds-stress.1 Wed Aug 4 15:25:11 2010 |
2240 --- /tmp/rds-tools-2.0.4/rds-stress.1 Wed Aug 4 15:25:11 2010 |
1292 +++ rds-tools-2.0.7/rds-stress.1 Thu Feb 24 13:27:52 2011 |
2241 +++ rds-tools-2.0.7/rds-stress.1 Thu Feb 24 13:27:52 2011 |
1293 @@ -1,99 +1,102 @@ |
2242 @@ -1,99 +1,106 @@ |
1294 -.Dd May 15, 2007 |
2243 -.Dd May 15, 2007 |
1295 -.Dt RDS-STRESS 1 |
2244 -.Dt RDS-STRESS 1 |
1296 -.Os |
2245 -.Os |
1297 -.Sh NAME |
2246 -.Sh NAME |
1298 -.Nm rds-stress |
2247 -.Nm rds-stress |
1319 +.PP |
2268 +.PP |
1320 +.SH SYNOPSIS |
2269 +.SH SYNOPSIS |
1321 +.HP |
2270 +.HP |
1322 +.nf |
2271 +.nf |
1323 +rds-stress [-p port_number] -r [receive_address] [-s send_address] |
2272 +rds-stress [-p port_number] -r [receive_address] [-s send_address] |
1324 + [-a ack_bytes] [-q request_bytes] [-D rdma_bytes] |
2273 + [-Q tos] [-a ack_bytes] [-q request_bytes] [-D rdma_bytes] |
1325 + [-d queue_depth] [-t nr_tasks] [-c] [-R] [-V] [-v] |
2274 + [-d queue_depth] [-t nr_tasks] [-c] [-R] [-V] [-v] |
1326 +.fi |
2275 +.fi |
1327 |
2276 |
1328 -.Sh DESCRIPTION |
2277 -.Sh DESCRIPTION |
1329 -.Nm rds-stress |
2278 -.Nm rds-stress |
1421 obtain the address once the control connection is established. |
2370 obtain the address once the control connection is established. |
1422 The active process will choose a local address based on the interface through |
2371 The active process will choose a local address based on the interface through |
1423 which it connects to the destination address. |
2372 which it connects to the destination address. |
1424 -.It Fl a Ar ack_bytes |
2373 -.It Fl a Ar ack_bytes |
1425 +.TP |
2374 +.TP |
2375 +\fB\-Q tos |
|
2376 +Uses the RDS connection between IP addresses with the specified tos value. By |
|
2377 +default, the base (tos = 0) RDS connection is used. |
|
2378 +.TP |
|
1426 +\fB\-a ack_bytes |
2379 +\fB\-a ack_bytes |
1427 This specifies the size of the ack messages, in bytes. There is a minimum size |
2380 This specifies the size of the ack messages, in bytes. There is a minimum size |
1428 which depends on the format of the ack messages, which may change over time. |
2381 which depends on the format of the ack messages, which may change over time. |
1429 See section "Message Sizes" below. |
2382 See section "Message Sizes" below. |
1430 -.It Fl q Ar request_bytes |
2383 -.It Fl q Ar request_bytes |
1437 +.TP |
2390 +.TP |
1438 +\fB\-D rdma_bytes |
2391 +\fB\-D rdma_bytes |
1439 RDSv3 is capable of transmitting part of a message via RDMA directly from |
2392 RDSv3 is capable of transmitting part of a message via RDMA directly from |
1440 application buffer to application buffer. This option enables RDMA support |
2393 application buffer to application buffer. This option enables RDMA support |
1441 in rds-stress: request packets include parameters for an RDMA READ or WRITE |
2394 in rds-stress: request packets include parameters for an RDMA READ or WRITE |
1442 @@ -100,20 +103,25 @@ |
2395 @@ -100,20 +107,25 @@ |
1443 operation, which the receiving process executes at the time the ACK packet |
2396 operation, which the receiving process executes at the time the ACK packet |
1444 is sent. |
2397 is sent. |
1445 See section "Message Sizes" below. |
2398 See section "Message Sizes" below. |
1446 -.It Fl d Ar queue_depth |
2399 -.It Fl d Ar queue_depth |
1447 +.TP |
2400 +.TP |
1468 +.TP |
2421 +.TP |
1469 +\fB\-c |
2422 +\fB\-c |
1470 This causes rds-stress to create child tasks which just consume CPU cycles. |
2423 This causes rds-stress to create child tasks which just consume CPU cycles. |
1471 One task is created for each CPU in the system. First each child observes the |
2424 One task is created for each CPU in the system. First each child observes the |
1472 maximum rate at which it can consume cycles. This means that this option |
2425 maximum rate at which it can consume cycles. This means that this option |
1473 @@ -121,50 +129,67 @@ |
2426 @@ -121,50 +133,67 @@ |
1474 use of the system by observing the lesser rate at which the children consume |
2427 use of the system by observing the lesser rate at which the children consume |
1475 cycles. This option is *not* shared between the active and passive instances. |
2428 cycles. This option is *not* shared between the active and passive instances. |
1476 It must be specified on each rds-stress command line. |
2429 It must be specified on each rds-stress command line. |
1477 -.It Fl R |
2430 -.It Fl R |
1478 +.TP |
2431 +.TP |
1535 +mbi K/s |
2488 +mbi K/s |
1536 +The total number of bytes that are being received via RDMA READs and |
2489 +The total number of bytes that are being received via RDMA READs and |
1537 WRITEs for all children. |
2490 WRITEs for all children. |
1538 -.It tx us/c |
2491 -.It tx us/c |
1539 +.TP |
2492 +.TP |
1540 +mbi K/s |
2493 +mbo K/s |
1541 +The total number of bytes that are being transmited via RDMA READs and |
2494 +The total number of bytes that are being transmited via RDMA READs and |
1542 +WRITEs for all children. |
2495 +WRITEs for all children. |
1543 +.TP |
2496 +.TP |
1544 +tx us/c |
2497 +tx us/c |
1545 The average number of microseconds spent in sendmsg() calls. |
2498 The average number of microseconds spent in sendmsg() calls. |
1555 +.TP |
2508 +.TP |
1556 +cpu % |
2509 +cpu % |
1557 This is the percentage of available CPU resources on this machine that are being |
2510 This is the percentage of available CPU resources on this machine that are being |
1558 consumed since rds-stress started running. It will show -1.00 if -c is not |
2511 consumed since rds-stress started running. It will show -1.00 if -c is not |
1559 given. It is calculated based on the amount of CPU resources that CPU soaking |
2512 given. It is calculated based on the amount of CPU resources that CPU soaking |
1560 @@ -171,4 +196,3 @@ |
2513 @@ -171,4 +200,3 @@ |
1561 tasks are able to consume. This lets it measure CPU use by the system, say in |
2514 tasks are able to consume. This lets it measure CPU use by the system, say in |
1562 interrupt handlers, that task-based CPU accounting does not include. |
2515 interrupt handlers, that task-based CPU accounting does not include. |
1563 For this to work rds-stress must be started with -c on an idle system. |
2516 For this to work rds-stress must be started with -c on an idle system. |
1564 -.El |
2517 -.El |
1565 diff -r -u /tmp/rds-tools-2.0.4/include/rds.h rds-tools-2.0.7/include/rds.h |
2518 diff -r -u /tmp/rds-tools-2.0.4/include/rds.h rds-tools-2.0.7/include/rds.h |