branch | s11-update |
changeset 3195 | cf6a5a756b74 |
parent 715 | eed3ed08f692 |
3193:e45380d8d511 | 3195:cf6a5a756b74 |
---|---|
13 #include <syscall.h> |
13 #include <syscall.h> |
14 +#endif |
14 +#endif |
15 #include <sys/stat.h> |
15 #include <sys/stat.h> |
16 #include <sys/poll.h> |
16 #include <sys/poll.h> |
17 #include <ctype.h> |
17 #include <ctype.h> |
18 @@ -22,10 +28,16 @@ |
18 @@ -22,12 +28,27 @@ |
19 #include <fcntl.h> |
19 #include <fcntl.h> |
20 #include <sched.h> |
20 #include <sched.h> |
21 #include <getopt.h> |
21 #include <getopt.h> |
22 +#include <sys/ioctl.h> |
|
22 +#if !(defined(__SVR4) && defined(__sun)) |
23 +#if !(defined(__SVR4) && defined(__sun)) |
23 #include <byteswap.h> |
24 #include <byteswap.h> |
24 #include "rds.h" |
25 #include "rds.h" |
25 - |
26 - |
26 +#else |
27 +#else |
29 #include "pfhack.h" |
30 #include "pfhack.h" |
30 +#if defined(__SVR4) && defined(__sun) |
31 +#if defined(__SVR4) && defined(__sun) |
31 +#include <infiniband/ofa_solaris.h> |
32 +#include <infiniband/ofa_solaris.h> |
32 +#endif |
33 +#endif |
33 |
34 |
35 +#if defined(__SVR4) && defined(__sun) |
|
34 /* |
36 /* |
37 + * This definition is forward looking and is not yet present |
|
38 + * in Solaris rds.h file |
|
39 + */ |
|
40 +#define RDS_CMSG_RDMA_SEND_STATUS RDS_CMSG_RDMA_STATUS |
|
41 +#endif |
|
42 + |
|
43 +/* |
|
35 * |
44 * |
36 @@ -102,6 +114,10 @@ |
45 * TODO |
46 * - checksum the data some day. |
|
47 @@ -45,8 +66,9 @@ |
|
48 M_RDMA_READ_ONLY, |
|
49 M_RDMA_WRITE_ONLY |
|
50 }; |
|
51 +#define VERSION_MAX_LEN 16 |
|
52 |
|
53 -struct options { |
|
54 +struct options_2_0_6 { |
|
55 uint32_t req_depth; |
|
56 uint32_t req_size; |
|
57 uint32_t ack_size; |
|
58 @@ -76,9 +98,68 @@ |
|
59 uint32_t connect_retries; |
|
60 } __attribute__((packed)); |
|
61 |
|
62 +struct options { |
|
63 + char version[VERSION_MAX_LEN]; |
|
64 + uint32_t req_depth; |
|
65 + uint32_t req_size; |
|
66 + uint32_t ack_size; |
|
67 + uint32_t rdma_size; |
|
68 + uint32_t send_addr; |
|
69 + uint32_t receive_addr; |
|
70 + uint16_t starting_port; |
|
71 + uint16_t nr_tasks; |
|
72 + uint32_t run_time; |
|
73 + uint8_t summary_only; |
|
74 + uint8_t rtprio; |
|
75 + uint8_t tracing; |
|
76 + uint8_t verify; |
|
77 + uint8_t show_params; |
|
78 + uint8_t show_perfdata; |
|
79 + uint8_t use_cong_monitor; |
|
80 + uint8_t rdma_use_once; |
|
81 + uint8_t rdma_use_get_mr; |
|
82 + uint8_t rdma_use_fence; |
|
83 + uint8_t rdma_cache_mrs; |
|
84 + uint8_t rdma_key_o_meter; |
|
85 + uint8_t suppress_warnings; |
|
86 + uint8_t simplex; |
|
87 + uint8_t rw_mode; |
|
88 + uint32_t rdma_vector; |
|
89 + uint32_t rdma_alignment; |
|
90 + uint32_t connect_retries; |
|
91 + uint8_t tos; |
|
92 + uint8_t async; |
|
93 +} __attribute__((packed)); |
|
94 + |
|
95 +#define MAX_BUCKETS 16 |
|
96 + |
|
97 static struct options opt; |
|
98 static int control_fd; |
|
99 +static uint64_t rtt_threshold; |
|
100 +static int show_histogram; |
|
101 +static int reset_connection; |
|
102 +static char peer_version[VERSION_MAX_LEN]; |
|
103 |
|
104 +static int get_bucket(uint64_t rtt_time) |
|
105 +{ |
|
106 + int i; |
|
107 + uint64_t l_rtt_time = rtt_time; |
|
108 + |
|
109 + if (!l_rtt_time) |
|
110 + i = 0; |
|
111 + else |
|
112 + { |
|
113 + i = -1; |
|
114 + while (l_rtt_time) |
|
115 + { |
|
116 + i++; |
|
117 + l_rtt_time = (l_rtt_time >> 1); |
|
118 + } |
|
119 + } |
|
120 + |
|
121 + return i; |
|
122 +} |
|
123 + |
|
124 struct counter { |
|
125 uint64_t nr; |
|
126 uint64_t sum; |
|
127 @@ -102,6 +183,10 @@ |
|
37 |
128 |
38 #define NR_STATS S__LAST |
129 #define NR_STATS S__LAST |
39 |
130 |
40 +#if defined(__SVR4) && defined(__sun) |
131 +#if defined(__SVR4) && defined(__sun) |
41 +int sol_ioctl(int, int, struct rds_info_arg *, socklen_t *, int *); |
132 +int sol_ioctl(int, int, struct rds_info_arg *, socklen_t *, int *); |
42 +#endif |
133 +#endif |
43 + |
134 + |
44 /* |
135 /* |
45 * Parents share a mapped array of these with their children. Each child |
136 * Parents share a mapped array of these with their children. Each child |
46 * gets one. It's used to communicate between the child and the parent |
137 * gets one. It's used to communicate between the child and the parent |
47 @@ -110,6 +126,7 @@ |
138 @@ -110,9 +195,11 @@ |
48 struct child_control { |
139 struct child_control { |
49 pid_t pid; |
140 pid_t pid; |
50 int ready; |
141 int ready; |
51 + int stopping; |
142 + int stopping; |
52 struct timeval start; |
143 struct timeval start; |
53 struct counter cur[NR_STATS]; |
144 struct counter cur[NR_STATS]; |
54 struct counter last[NR_STATS]; |
145 struct counter last[NR_STATS]; |
55 @@ -254,7 +271,20 @@ |
146 + uint64_t latency_histogram[MAX_BUCKETS]; |
147 } __attribute__((aligned (256))); /* arbitrary */ |
|
148 |
|
149 struct soak_control { |
|
150 @@ -132,6 +219,7 @@ |
|
151 */ |
|
152 #define OP_REQ 1 |
|
153 #define OP_ACK 2 |
|
154 +#define OP_DUMP 3 |
|
155 |
|
156 #define RDMA_OP_READ 1 |
|
157 #define RDMA_OP_WRITE 2 |
|
158 @@ -148,7 +236,7 @@ |
|
159 uint16_t from_port; |
|
160 uint16_t to_port; |
|
161 uint16_t index; |
|
162 - uint8_t op; |
|
163 + uint8_t op; |
|
164 |
|
165 /* RDMA related. |
|
166 * rdma_op must be the first field, because we |
|
167 @@ -162,12 +250,21 @@ |
|
168 uint32_t rdma_size; |
|
169 uint32_t rdma_vector; |
|
170 |
|
171 - uint8_t data[0]; |
|
172 + /* Async send related. */ |
|
173 + uint8_t retry; |
|
174 + uint8_t rdma_remote_err; |
|
175 + uint8_t pending; |
|
176 + |
|
177 + uint8_t data[0]; |
|
178 } __attribute__((packed)); |
|
179 |
|
180 #define MIN_MSG_BYTES (sizeof(struct header)) |
|
181 #define BASIC_HEADER_SIZE (size_t)(&((struct header *) 0)->rdma_op) |
|
182 |
|
183 +#define print_outlier(...) do { \ |
|
184 + fprintf(stderr, __VA_ARGS__); \ |
|
185 +} while (0) |
|
186 + |
|
187 #define die(fmt...) do { \ |
|
188 fprintf(stderr, fmt); \ |
|
189 exit(1); \ |
|
190 @@ -254,7 +351,20 @@ |
|
56 |
191 |
57 die("invalid host name or dotted quad '%s'\n", ptr); |
192 die("invalid host name or dotted quad '%s'\n", ptr); |
58 } |
193 } |
59 +#if defined(__SVR4) && defined(__sun) |
194 +#if defined(__SVR4) && defined(__sun) |
60 +static lgrp_id_t lgrp_id = -1; |
195 +static lgrp_id_t lgrp_id = -1; |
71 +#endif |
206 +#endif |
72 + |
207 + |
73 static void usage(void) |
208 static void usage(void) |
74 { |
209 { |
75 fprintf(stderr, "rds-stress version %s\n", RDS_VERSION); |
210 fprintf(stderr, "rds-stress version %s\n", RDS_VERSION); |
76 @@ -281,6 +311,9 @@ |
211 @@ -273,6 +383,7 @@ |
212 " -d [depth, 1] request pipeline depth, nr outstanding\n" |
|
213 " -t [nr, 1] number of child tasks\n" |
|
214 " -T [seconds, 0] runtime of test, 0 means infinite\n" |
|
215 + " -Q [tos, 0] Type of Service\n" |
|
216 " -D [bytes] RDMA: size\n" |
|
217 " -I [iovecs, 1] RDMA: number of user buffers to target (max 512)\n" |
|
218 " -M [nr, 0] RDMA: mode (0=readwrite,1=readonly,2=writeonly)\n" |
|
219 @@ -281,6 +392,9 @@ |
|
77 " -c measure cpu use with per-cpu soak processes\n" |
220 " -c measure cpu use with per-cpu soak processes\n" |
78 " -V trace execution\n" |
221 " -V trace execution\n" |
79 " -z print a summary at end of test only\n" |
222 " -z print a summary at end of test only\n" |
80 +#if defined(__SVR4) && defined(__sun) |
223 +#if defined(__SVR4) && defined(__sun) |
81 + " -g [lgrpid] bind the process to the specified lgrp\n" |
224 + " -g [lgrpid] bind the process to the specified lgrp\n" |
82 +#endif |
225 +#endif |
83 "\n" |
226 "\n" |
84 "Example:\n" |
227 "Example:\n" |
85 " recv$ rds-stress\n" |
228 " recv$ rds-stress\n" |
86 @@ -310,7 +343,7 @@ |
229 @@ -310,7 +424,7 @@ |
87 static void check_parent(pid_t pid) |
230 static void check_parent(pid_t pid) |
88 { |
231 { |
89 if (pid != getppid()) |
232 if (pid != getppid()) |
90 - die("parent %u exited\n", pid); |
233 - die("parent %u exited\n", pid); |
91 + die("parent %u exited\n", (int)pid); |
234 + die("parent %u exited\n", (int)pid); |
92 } |
235 } |
93 |
236 |
94 /* |
237 /* |
95 @@ -334,6 +367,7 @@ |
238 @@ -334,6 +448,7 @@ |
96 msg_pattern[i] = k; |
239 msg_pattern[i] = k; |
97 } |
240 } |
98 |
241 |
99 +#if !(defined(__SVR4) && defined(__sun)) |
242 +#if !(defined(__SVR4) && defined(__sun)) |
100 #if __BYTE_ORDER == __LITTLE_ENDIAN |
243 #if __BYTE_ORDER == __LITTLE_ENDIAN |
101 #define htonll(x) bswap_64(x) |
244 #define htonll(x) bswap_64(x) |
102 #define ntohll(x) bswap_64(x) |
245 #define ntohll(x) bswap_64(x) |
103 @@ -341,6 +375,7 @@ |
246 @@ -341,6 +456,7 @@ |
104 #define htonll(x) (x) |
247 #define htonll(x) (x) |
105 #define ntohll(x) (x) |
248 #define ntohll(x) (x) |
106 #endif |
249 #endif |
107 +#endif /* Not sun */ |
250 +#endif /* Not sun */ |
108 |
251 |
109 static void encode_hdr(struct header *dst, const struct header *hdr) |
252 static void encode_hdr(struct header *dst, const struct header *hdr) |
110 { |
253 { |
111 @@ -584,7 +619,11 @@ |
254 @@ -361,6 +477,7 @@ |
255 dst->rdma_key = htonll(hdr->rdma_key); |
|
256 dst->rdma_size = htonl(hdr->rdma_size); |
|
257 dst->rdma_vector = htonl(hdr->rdma_vector); |
|
258 + dst->retry = hdr->retry; |
|
259 } |
|
260 |
|
261 static void decode_hdr(struct header *dst, const struct header *hdr) |
|
262 @@ -382,6 +499,7 @@ |
|
263 dst->rdma_key = ntohll(hdr->rdma_key); |
|
264 dst->rdma_size = ntohl(hdr->rdma_size); |
|
265 dst->rdma_vector = ntohl(hdr->rdma_vector); |
|
266 + dst->retry = hdr->retry; |
|
267 } |
|
268 |
|
269 static void fill_hdr(void *message, uint32_t bytes, struct header *hdr) |
|
270 @@ -412,11 +530,19 @@ |
|
271 * Compare incoming message header with expected header. All header fields |
|
272 * are in host byte order except for address and port fields. |
|
273 */ |
|
274 -static int check_hdr(void *message, uint32_t bytes, const struct header *hdr) |
|
275 +static int check_hdr(void *message, uint32_t bytes, struct header *hdr, struct options *opts) |
|
276 { |
|
277 struct header msghdr; |
|
278 + uint32_t inc_seq; |
|
279 + uint32_t my_seq; |
|
280 |
|
281 decode_hdr(&msghdr, message); |
|
282 + inc_seq = msghdr.seq; |
|
283 + my_seq = hdr->seq; |
|
284 + |
|
285 + if (msghdr.retry && (inc_seq < my_seq)) |
|
286 + return -1; |
|
287 + |
|
288 if (memcmp(&msghdr, hdr, BASIC_HEADER_SIZE)) { |
|
289 #define bleh(var, disp) \ |
|
290 disp(hdr->var), \ |
|
291 @@ -428,7 +554,7 @@ |
|
292 * with stdout() and we don't get things stomping on each |
|
293 * other |
|
294 */ |
|
295 - printf( "An incoming message had a header which\n" |
|
296 + printf( "An incoming message had a %s header which\n" |
|
297 "didn't contain the fields we expected:\n" |
|
298 " member expected eq got\n" |
|
299 " seq %15u %s %15u\n" |
|
300 @@ -438,6 +564,7 @@ |
|
301 " to_port %15u %s %15u\n" |
|
302 " index %15u %s %15u\n" |
|
303 " op %15u %s %15u\n", |
|
304 + (msghdr.retry) ? "RETRY" : "", |
|
305 bleh(seq, /**/), |
|
306 bleh(from_addr, inet_ntoa_32), |
|
307 bleh(from_port, ntohs), |
|
308 @@ -569,6 +696,9 @@ |
|
309 |
|
310 fcntl(fd, F_SETFL, O_NONBLOCK); |
|
311 |
|
312 + if (opts->tos && ioctl(fd, SIOCRDSSETTOS, &opts->tos)) |
|
313 + die_errno("ERROR: failed to set TOS\n"); |
|
314 + |
|
315 return fd; |
|
316 } |
|
317 |
|
318 @@ -584,7 +714,11 @@ |
|
112 if (opts->receive_addr == 0) |
319 if (opts->receive_addr == 0) |
113 return 1; |
320 return 1; |
114 |
321 |
115 +#if defined(__SVR4) && defined(__sun) |
322 +#if defined(__SVR4) && defined(__sun) |
116 + sin.sin_family = AF_INET_OFFLOAD; |
323 + sin.sin_family = AF_INET_OFFLOAD; |
118 sin.sin_family = AF_INET; |
325 sin.sin_family = AF_INET; |
119 +#endif |
326 +#endif |
120 sin.sin_port = htons(opts->starting_port); |
327 sin.sin_port = htons(opts->starting_port); |
121 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
328 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
122 |
329 |
123 @@ -677,7 +716,11 @@ |
330 @@ -639,7 +773,7 @@ |
331 mr_args.flags = RDS_FREE_MR_ARGS_INVALIDATE; |
|
332 #endif |
|
333 if (setsockopt(fd, sol, RDS_FREE_MR, &mr_args, sizeof(mr_args))) |
|
334 - die_errno("setsockopt(RDS_FREE_MR) failed"); |
|
335 + return; |
|
336 mrs_allocated--; |
|
337 } |
|
338 |
|
339 @@ -677,7 +811,11 @@ |
|
124 size = sizeof(struct rdma_key_o_meter) |
340 size = sizeof(struct rdma_key_o_meter) |
125 + 2 * nr_tasks * sizeof(*kt) |
341 + 2 * nr_tasks * sizeof(*kt) |
126 + 2 * RDMA_MAX_TRACKED_KEYS * sizeof(*ks); |
342 + 2 * RDMA_MAX_TRACKED_KEYS * sizeof(*ks); |
127 +#if defined(__SVR4) && defined(__sun) |
343 +#if defined(__SVR4) && defined(__sun) |
128 + base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
344 + base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
130 base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
346 base = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
131 +#endif |
347 +#endif |
132 if (base == MAP_FAILED) |
348 if (base == MAP_FAILED) |
133 die_errno("alloc_rdma_buffers: mmap failed"); |
349 die_errno("alloc_rdma_buffers: mmap failed"); |
134 |
350 |
135 @@ -828,7 +871,7 @@ |
351 @@ -828,13 +966,20 @@ |
136 } |
352 } |
137 |
353 |
138 if (!failed) |
354 if (!failed) |
139 - trace("compare pass pattern %Lx addr %p\n", |
355 - trace("compare pass pattern %Lx addr %p\n", |
140 + trace("compare pass pattern 0x%Lx addr %p\n", |
356 + trace("compare pass pattern 0x%Lx addr %p\n", |
141 (unsigned long long) pattern, addr); |
357 (unsigned long long) pattern, addr); |
142 } |
358 } |
143 |
359 |
144 @@ -865,7 +908,11 @@ |
360 +struct retry_entry { |
361 + uint32_t retries; |
|
362 + uint32_t seq; |
|
363 + int status; |
|
364 +}; |
|
365 + |
|
366 struct task { |
|
367 unsigned int nr; |
|
368 unsigned int pending; |
|
369 + int trace; |
|
370 unsigned int unacked; |
|
371 struct sockaddr_in src_addr; /* same for all tasks */ |
|
372 struct sockaddr_in dst_addr; |
|
373 @@ -846,7 +991,14 @@ |
|
374 uint16_t recv_index; |
|
375 struct timeval * send_time; |
|
376 struct header * ack_header; |
|
377 + struct header * ack2_header; |
|
378 + struct header * req_header; |
|
379 + uint64_t * retry_token; |
|
380 + uint32_t retries; |
|
381 + uint32_t last_retry_seq; |
|
382 + uint32_t retry_index; |
|
383 |
|
384 + |
|
385 /* RDMA related stuff */ |
|
386 uint64_t ** local_buf; |
|
387 uint64_t ** rdma_buf; |
|
388 @@ -865,7 +1017,11 @@ |
|
145 /* We use mmap here rather than malloc, because it is always |
389 /* We use mmap here rather than malloc, because it is always |
146 * page aligned. */ |
390 * page aligned. */ |
147 len = 2 * opts->nr_tasks * opts->req_depth * (opts->rdma_vector * opts->rdma_size) + sys_page_size; |
391 len = 2 * opts->nr_tasks * opts->req_depth * (opts->rdma_vector * opts->rdma_size) + sys_page_size; |
148 +#if defined(__SVR4) && defined(__sun) |
392 +#if defined(__SVR4) && defined(__sun) |
149 + base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
393 + base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
151 base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); |
395 base = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); |
152 +#endif |
396 +#endif |
153 if (base == MAP_FAILED) |
397 if (base == MAP_FAILED) |
154 die_errno("alloc_rdma_buffers: mmap failed"); |
398 die_errno("alloc_rdma_buffers: mmap failed"); |
155 memset(base, 0x2f, len); |
399 memset(base, 0x2f, len); |
156 @@ -915,17 +962,16 @@ |
400 @@ -915,17 +1071,16 @@ |
157 if (RDMA_OP_READ == hdr->rdma_op) { |
401 if (RDMA_OP_READ == hdr->rdma_op) { |
158 if (opt.verify) |
402 if (opt.verify) |
159 rds_fill_buffer(rdma_addr, rdma_size, hdr->rdma_pattern); |
403 rds_fill_buffer(rdma_addr, rdma_size, hdr->rdma_pattern); |
160 - trace("Requesting RDMA read for pattern %Lx " |
404 - trace("Requesting RDMA read for pattern %Lx " |
161 - "local addr to rdma read %p\n", |
405 - "local addr to rdma read %p\n", |
175 + trace("Requesting RDMA write for pattern 0x%Lx", |
419 + trace("Requesting RDMA write for pattern 0x%Lx", |
176 + hdr->rdma_pattern); |
420 + hdr->rdma_pattern); |
177 } |
421 } |
178 } |
422 } |
179 |
423 |
180 @@ -947,7 +993,7 @@ |
424 @@ -947,7 +1102,7 @@ |
181 die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op); |
425 die("Unexpected RDMA op %u in request\n", in_hdr->rdma_op); |
182 |
426 |
183 |
427 |
184 - trace("RDS received request to issue rdma %s len %lu rva %Lx key %Lx pattern %Lx\n", |
428 - trace("RDS received request to issue rdma %s len %lu rva %Lx key %Lx pattern %Lx\n", |
185 + trace("RDS received request to issue rdma %s len %lu rva 0x%Lx key 0x%Lx pattern 0x%Lx\n", |
429 + trace("RDS received request to issue rdma %s len %lu rva 0x%Lx key 0x%Lx pattern 0x%Lx\n", |
186 in_hdr->rdma_op == RDMA_OP_WRITE? "write to" : "read from", |
430 in_hdr->rdma_op == RDMA_OP_WRITE? "write to" : "read from", |
187 rdma_size, |
431 rdma_size, |
188 (unsigned long long) in_hdr->rdma_addr, |
432 (unsigned long long) in_hdr->rdma_addr, |
189 @@ -1007,6 +1053,9 @@ |
433 @@ -966,21 +1121,33 @@ |
434 hdr->rdma_vector = in_hdr->rdma_vector; |
|
435 } |
|
436 |
|
437 -static inline unsigned int rdma_user_token(struct task *t, unsigned int qindex) |
|
438 +static inline uint64_t rdma_user_token(struct task *t, unsigned int qindex, unsigned int type, uint32_t seq) |
|
439 { |
|
440 - return t->nr * opt.req_depth + qindex; |
|
441 + uint64_t tmp = seq; |
|
442 + return (tmp << 32) | ((t->nr * opt.req_depth + qindex) << 2 | type); |
|
443 } |
|
444 |
|
445 -static void rdma_mark_completed(struct task *tasks, unsigned int token, int status) |
|
446 +static void rdma_mark_completed(struct task *tasks, uint64_t token, int status, struct options *opts) |
|
447 { |
|
448 struct task *t; |
|
449 unsigned int i; |
|
450 + struct header *hdr = NULL; |
|
451 + uint32_t seq = token >> 32; |
|
452 + unsigned int type = token & 0x03; |
|
453 + unsigned int index = (token & 0xFFFFFFFF) >> 2; |
|
454 |
|
455 - trace("RDS rdma completion for token %x\n", token); |
|
456 + trace("RDS rdma completion for token 0x%lx\n", token); |
|
457 |
|
458 - t = &tasks[token / opt.req_depth]; |
|
459 - i = token % opt.req_depth; |
|
460 + t = &tasks[index / opt.req_depth]; |
|
461 + i = index % opt.req_depth; |
|
462 |
|
463 + if (opts->async) { |
|
464 + if (type == OP_REQ) |
|
465 + hdr = &t->req_header[i]; |
|
466 + else |
|
467 + hdr = &t->ack2_header[i]; |
|
468 + } |
|
469 + |
|
470 if (status) { |
|
471 const char *errmsg; |
|
472 |
|
473 @@ -987,20 +1154,50 @@ |
|
474 switch (status) { |
|
475 case RDS_RDMA_REMOTE_ERROR: |
|
476 errmsg = "remote error"; break; |
|
477 - case RDS_RDMA_CANCELED: |
|
478 - errmsg = "operation was cancelled"; break; |
|
479 - case RDS_RDMA_DROPPED: |
|
480 + case RDS_RDMA_SEND_DROPPED: |
|
481 errmsg = "operation was dropped"; break; |
|
482 - case RDS_RDMA_OTHER_ERROR: |
|
483 + case RDS_RDMA_SEND_CANCELED: |
|
484 + errmsg = "operation was cancelled"; break; |
|
485 + case RDS_RDMA_SEND_OTHER_ERROR: |
|
486 errmsg = "other error"; break; |
|
487 default: |
|
488 errmsg = "unknown error"; break; |
|
489 } |
|
490 |
|
491 - printf("%s:%u: RDMA op %u failed: %s\n", |
|
492 + trace("%s:%u: %s failed: %s\n", |
|
493 inet_ntoa(t->dst_addr.sin_addr), |
|
494 ntohs(t->dst_addr.sin_port), |
|
495 - i, errmsg); |
|
496 + type ? "SEND" : "RDMA", |
|
497 + errmsg); |
|
498 + |
|
499 + if (hdr && |
|
500 + (status == RDS_RDMA_SEND_DROPPED || |
|
501 + status == RDS_RDMA_REMOTE_ERROR)) { |
|
502 + |
|
503 + if (hdr->seq == seq) { |
|
504 + hdr->retry = 1; |
|
505 + if (hdr->seq > t->last_retry_seq) { |
|
506 + if (status == RDS_RDMA_REMOTE_ERROR) |
|
507 + hdr->rdma_remote_err = 1; |
|
508 + t->retry_token[t->retry_index] = token; |
|
509 + t->retry_index = (t->retry_index + 1) % |
|
510 + (2 * opts->req_depth); |
|
511 + t->retries += 1; |
|
512 + t->last_retry_seq = hdr->seq; |
|
513 + if (t->retries > 2 * opts->req_depth) |
|
514 + die("Exceeded MAX retry entries..\n"); |
|
515 + } |
|
516 + } else |
|
517 + die("SEQ Out-Of-Sync: %u/%u\n", hdr->seq, seq); |
|
518 + } else if (hdr) { |
|
519 + hdr->pending = 0; |
|
520 + hdr->retry = 0; |
|
521 + hdr->rdma_remote_err = 0; |
|
522 + } |
|
523 + } else if (hdr) { |
|
524 + hdr->pending = 0; |
|
525 + hdr->retry = 0; |
|
526 + hdr->rdma_remote_err = 0; |
|
527 } |
|
528 |
|
529 t->rdma_inflight[i] = 0; |
|
530 @@ -1007,6 +1204,9 @@ |
|
190 t->drain_rdmas = 0; |
531 t->drain_rdmas = 0; |
191 } |
532 } |
192 |
533 |
193 +#if defined(__SVR4) && defined(__sun) |
534 +#if defined(__SVR4) && defined(__sun) |
194 +#undef MSG_MAXIOVLEN |
535 +#undef MSG_MAXIOVLEN |
195 +#endif |
536 +#endif |
196 #define MSG_MAXIOVLEN 2 |
537 #define MSG_MAXIOVLEN 2 |
197 |
538 |
198 /* |
539 /* |
199 @@ -1560,7 +1609,12 @@ |
540 @@ -1018,11 +1218,14 @@ |
541 static char ctlbuf[1024]; |
|
542 struct cmsghdr *cmsg; |
|
543 |
|
544 - msg->msg_control = ctlbuf; |
|
545 - msg->msg_controllen = CMSG_SPACE(size); |
|
546 - |
|
547 - cmsg = CMSG_FIRSTHDR(msg); |
|
548 - cmsg->cmsg_level = sol; |
|
549 + if (!msg->msg_control) { |
|
550 + msg->msg_control = ctlbuf; |
|
551 + msg->msg_controllen = CMSG_SPACE(size); |
|
552 + cmsg = CMSG_FIRSTHDR(msg); |
|
553 + } else { |
|
554 + cmsg = (struct cmsghdr *)((char *)msg->msg_control + msg->msg_controllen); |
|
555 + msg->msg_controllen += CMSG_SPACE(size); |
|
556 + }cmsg->cmsg_level = sol; |
|
557 cmsg->cmsg_type = type; |
|
558 cmsg->cmsg_len = CMSG_LEN(size); |
|
559 memcpy(CMSG_DATA(cmsg), ptr, size); |
|
560 @@ -1034,7 +1237,7 @@ |
|
561 * the ACK packet. |
|
562 */ |
|
563 static void rdma_build_cmsg_xfer(struct msghdr *msg, const struct header *hdr, |
|
564 - unsigned int user_token, void *local_buf) |
|
565 + uint64_t user_token, void *local_buf) |
|
566 { |
|
567 |
|
568 #define RDS_MAX_IOV 512 /* FIX_ME - put this into rds.h or use socket max ?*/ |
|
569 @@ -1048,7 +1251,7 @@ |
|
570 rdma_size = hdr->rdma_size; |
|
571 rdma_vector = hdr->rdma_vector; |
|
572 |
|
573 - trace("RDS issuing rdma for token %x key %Lx len %u local_buf %p vector %u\n", |
|
574 + trace("RDS issuing rdma for token 0x%lx key 0x%llx len %d local_buf %p vector %d\n", |
|
575 user_token, |
|
576 (unsigned long long) hdr->rdma_key, |
|
577 rdma_size, local_buf, |
|
578 @@ -1102,6 +1305,15 @@ |
|
579 rdma_put_cmsg(msg, RDS_CMSG_RDMA_ARGS, &args, sizeof(args)); |
|
580 } |
|
581 |
|
582 +static void build_cmsg_async_send(struct msghdr *msg, uint64_t user_token) |
|
583 +{ |
|
584 + struct rds_asend_args args; |
|
585 + |
|
586 + args.flags |= RDS_SEND_NOTIFY_ME; |
|
587 + args.user_token = user_token; |
|
588 + rdma_put_cmsg(msg, RDS_CMSG_ASYNC_SEND, &args, sizeof(args)); |
|
589 +} |
|
590 + |
|
591 static void rdma_build_cmsg_dest(struct msghdr *msg, rds_rdma_cookie_t rdma_dest) |
|
592 { |
|
593 rdma_put_cmsg(msg, RDS_CMSG_RDMA_DEST, &rdma_dest, sizeof(rdma_dest)); |
|
594 @@ -1174,19 +1386,17 @@ |
|
595 hdr->index = qindex; |
|
596 } |
|
597 |
|
598 -static int send_packet(int fd, struct task *t, |
|
599 - struct header *hdr, unsigned int size) |
|
600 +static int send_msg(int fd, struct task *t, struct header *hdr, |
|
601 + unsigned int size, struct options *opts, |
|
602 + struct child_control *ctl) |
|
603 { |
|
604 - unsigned char buf[size], *rdma_flight_recorder = NULL; |
|
605 + unsigned char buf[size]; |
|
606 + uint8_t *rdma_flight_recorder = NULL; |
|
607 rds_rdma_cookie_t cookie = 0; |
|
608 struct msghdr msg; |
|
609 struct iovec iov; |
|
610 ssize_t ret; |
|
611 |
|
612 - /* Make sure we always have the current sequence number. |
|
613 - * When we send ACK packets, the seq that gets filled in is |
|
614 - * stale. */ |
|
615 - hdr->seq = t->send_seq; |
|
616 fill_hdr(buf, size, hdr); |
|
617 |
|
618 memset(&msg, 0, sizeof(msg)); |
|
619 @@ -1198,27 +1408,10 @@ |
|
620 iov.iov_base = buf; |
|
621 iov.iov_len = size; |
|
622 |
|
623 - /* If this is a REQ packet in which we pass the MR to the |
|
624 - * peer, extract the RDMA cookie and pass it on in the control |
|
625 - * message for now. */ |
|
626 - if (hdr->op == OP_REQ && hdr->rdma_op != 0) { |
|
627 - if (hdr->rdma_key != 0) { |
|
628 - /* We used GET_MR to obtain a key */ |
|
629 - rdma_build_cmsg_dest(&msg, hdr->rdma_key); |
|
630 - cookie = hdr->rdma_key; |
|
631 - hdr->rdma_key = 0; |
|
632 - } else { |
|
633 - /* Use the RDMA_MAP cmsg to have sendmsg do the |
|
634 - * mapping on the fly. */ |
|
635 - rdma_build_cmsg_map(&msg, hdr->rdma_addr, |
|
636 - hdr->rdma_size * hdr->rdma_vector, |
|
637 - &cookie); |
|
638 - } |
|
639 - } |
|
640 |
|
641 /* If this is an ACK packet with RDMA, build the cmsg |
|
642 - * header that goes with it. */ |
|
643 - if (hdr->op == OP_ACK && hdr->rdma_op != 0) { |
|
644 + * header that goes with it. */ |
|
645 + if (hdr->op == OP_ACK && hdr->rdma_op != 0 && !hdr->rdma_remote_err) { |
|
646 unsigned int qindex = hdr->index; |
|
647 |
|
648 if (t->rdma_inflight[qindex] != 0) { |
|
649 @@ -1230,16 +1423,35 @@ |
|
650 * |
|
651 * We return one of the more obscure error messages, |
|
652 * which we recognize and handle in the top loop. */ |
|
653 - trace("Drain RDMA 0x%x\n", rdma_user_token(t, qindex)); |
|
654 + trace("Drain RDMA 0x%lx\n", rdma_user_token(t, qindex, 0, hdr->seq)); |
|
655 errno = EBADSLT; |
|
656 return -1; |
|
657 } |
|
658 rdma_build_cmsg_xfer(&msg, hdr, |
|
659 - rdma_user_token(t, qindex), |
|
660 + rdma_user_token(t, qindex, 0, hdr->seq), |
|
661 t->local_buf[qindex]); |
|
662 rdma_flight_recorder = &t->rdma_inflight[qindex]; |
|
663 + } else if (opts->async) { |
|
664 + if (hdr->op == OP_REQ) |
|
665 + build_cmsg_async_send(&msg, |
|
666 + rdma_user_token(t, hdr->index, OP_REQ, hdr->seq)); |
|
667 + else |
|
668 + build_cmsg_async_send(&msg, |
|
669 + rdma_user_token(t, hdr->index, OP_ACK, hdr->seq)); |
|
670 } |
|
671 |
|
672 + if (hdr->op == OP_REQ && hdr->rdma_op != 0) { |
|
673 + if (hdr->rdma_key != 0) { |
|
674 + rdma_build_cmsg_dest(&msg, hdr->rdma_key); |
|
675 + cookie = hdr->rdma_key; |
|
676 + hdr->rdma_key = 0; |
|
677 + } else { |
|
678 + rdma_build_cmsg_map(&msg, hdr->rdma_addr, |
|
679 + hdr->rdma_size * hdr->rdma_vector, |
|
680 + &cookie); |
|
681 + } |
|
682 + } |
|
683 + |
|
684 ret = sendmsg(fd, &msg, 0); |
|
685 if (ret < 0) { |
|
686 if (errno != EAGAIN && errno != ENOBUFS) |
|
687 @@ -1256,10 +1468,41 @@ |
|
688 * lower 32bit of the cookie */ |
|
689 rdma_key_o_meter_add(cookie); |
|
690 } |
|
691 + |
|
692 + hdr->pending = 1; |
|
693 + |
|
694 + return ret; |
|
695 +} |
|
696 + |
|
697 +static int send_packet(int fd, struct task *t, |
|
698 + struct header *hdr, unsigned int size, |
|
699 + struct options *opts, struct child_control *ctl) |
|
700 +{ |
|
701 + ssize_t ret; |
|
702 + |
|
703 + /* Make sure we always have the current sequence number. |
|
704 + * When we send ACK packets, the seq that gets filled in is |
|
705 + * stale. */ |
|
706 + hdr->seq = t->send_seq; |
|
707 + |
|
708 + ret = send_msg(fd, t, hdr, size, opts, ctl); |
|
709 + if (ret < 0) return ret; |
|
710 + |
|
711 t->send_seq++; |
|
712 return ret; |
|
713 } |
|
714 |
|
715 +static int resend_packet(int fd, struct task *t, |
|
716 + struct header *hdr, unsigned int size, |
|
717 + struct options *opts, struct child_control *ctl) |
|
718 +{ |
|
719 + ssize_t ret; |
|
720 + |
|
721 + ret = send_msg(fd, t, hdr, size, opts, ctl); |
|
722 + |
|
723 + return ret; |
|
724 +} |
|
725 + |
|
726 static int send_one(int fd, struct task *t, |
|
727 struct options *opts, |
|
728 struct child_control *ctl) |
|
729 @@ -1266,12 +1509,16 @@ |
|
730 { |
|
731 struct timeval start; |
|
732 struct timeval stop; |
|
733 - struct header hdr; |
|
734 + struct header *hdr = &t->req_header[t->send_index]; |
|
735 int ret; |
|
736 |
|
737 - build_header(t, &hdr, OP_REQ, t->send_index); |
|
738 + if (opts->async && hdr->pending) { |
|
739 + return -1; |
|
740 + } |
|
741 + |
|
742 + build_header(t, hdr, OP_REQ, t->send_index); |
|
743 if (opts->rdma_size && t->send_seq > 10) |
|
744 - rdma_build_req(fd, &hdr, t, |
|
745 + rdma_build_req(fd, hdr, t, |
|
746 opts->rdma_size, |
|
747 opts->req_depth, |
|
748 opts->rw_mode, |
|
749 @@ -1279,7 +1526,7 @@ |
|
750 |
|
751 |
|
752 gettimeofday(&start, NULL); |
|
753 - ret = send_packet(fd, t, &hdr, opts->req_size); |
|
754 + ret = send_packet(fd, t, hdr, opts->req_size, opts, ctl); |
|
755 gettimeofday(&stop, NULL); |
|
756 |
|
757 if (ret < 0) |
|
758 @@ -1302,10 +1549,15 @@ |
|
759 struct child_control *ctl) |
|
760 { |
|
761 struct header *hdr = &t->ack_header[qindex]; |
|
762 + struct header *hdr2 = &t->ack2_header[qindex]; |
|
763 ssize_t ret; |
|
764 |
|
765 + if (opts->async && hdr2->pending) { |
|
766 + return -1; |
|
767 + } |
|
768 + |
|
769 /* send an ack in response to the req we just got */ |
|
770 - ret = send_packet(fd, t, hdr, opts->ack_size); |
|
771 + ret = send_packet(fd, t, hdr, opts->ack_size, opts, ctl); |
|
772 if (ret < 0) |
|
773 return ret; |
|
774 if (ret != opts->ack_size) |
|
775 @@ -1324,6 +1576,8 @@ |
|
776 break; |
|
777 } |
|
778 |
|
779 + memcpy(hdr2, hdr, sizeof(struct header)); |
|
780 + |
|
781 return ret; |
|
782 } |
|
783 |
|
784 @@ -1354,8 +1608,49 @@ |
|
785 struct child_control *ctl, |
|
786 int can_send, int do_work) |
|
787 { |
|
788 + struct header *hdr; |
|
789 + unsigned int index; |
|
790 + int req_size; |
|
791 + int num_retries = t->retries; |
|
792 + uint64_t token; |
|
793 + unsigned int type; |
|
794 + unsigned int index2; |
|
795 + unsigned int i; |
|
796 + |
|
797 + while (opts->async && num_retries > 0) { |
|
798 + index = (t->retry_index - num_retries + |
|
799 + (2 * opts->req_depth)) % (2 * opts->req_depth); |
|
800 + |
|
801 + token = t->retry_token[index]; |
|
802 + type = token & 0x03; |
|
803 + index2 = (token & 0xFFFFFFFF) >> 2; |
|
804 + i = index2 % opts->req_depth; |
|
805 + |
|
806 + if (type == OP_REQ) |
|
807 + hdr = &t->req_header[i]; |
|
808 + else |
|
809 + hdr = &t->ack2_header[i]; |
|
810 + |
|
811 + if (!hdr->retry) |
|
812 + goto next; |
|
813 + |
|
814 + if (hdr->op == OP_REQ) |
|
815 + req_size = opts->req_size; |
|
816 + else |
|
817 + req_size = opts->ack_size; |
|
818 + |
|
819 + if (resend_packet(fd, t, hdr, req_size, opts, ctl) < 0) { |
|
820 + return -1; |
|
821 + } |
|
822 + hdr->retry = 0; |
|
823 +next: |
|
824 + num_retries--; |
|
825 + } |
|
826 + t->last_retry_seq = t->retries = 0; |
|
827 + |
|
828 if (ack_anything(fd, t, opts, ctl, can_send) < 0) |
|
829 return -1; |
|
830 + |
|
831 while (do_work && t->pending < opts->req_depth) { |
|
832 if (!can_send) |
|
833 goto eagain; |
|
834 @@ -1375,7 +1670,8 @@ |
|
835 rds_rdma_cookie_t *cookie, |
|
836 struct sockaddr_in *sin, |
|
837 struct timeval *tstamp, |
|
838 - struct task *tasks) |
|
839 + struct task *tasks, |
|
840 + struct options *opts) |
|
841 { |
|
842 struct cmsghdr *cmsg; |
|
843 char cmsgbuf[256]; |
|
844 @@ -1398,15 +1694,16 @@ |
|
845 |
|
846 if (ret < 0) |
|
847 return ret; |
|
848 - if (ret && ret < sizeof(struct header)) |
|
849 + if (ret && !strcmp(RDS_VERSION, peer_version) && |
|
850 + ret < sizeof(struct header)) |
|
851 die("recvmsg() returned short data: %zd", ret); |
|
852 - if (msg.msg_namelen < sizeof(struct sockaddr_in)) |
|
853 + if (ret && msg.msg_namelen < sizeof(struct sockaddr_in)) |
|
854 die("socklen = %d < sizeof(sin) (%zu)\n", |
|
855 msg.msg_namelen, sizeof(struct sockaddr_in)); |
|
856 |
|
857 /* See if the message comes with a RDMA destination */ |
|
858 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { |
|
859 - struct rds_rdma_notify notify; |
|
860 + struct rds_rdma_send_notify notify; |
|
861 |
|
862 if (cmsg->cmsg_level != sol) |
|
863 continue; |
|
864 @@ -1432,11 +1729,11 @@ |
|
865 memcpy(cookie, CMSG_DATA(cmsg), sizeof(*cookie)); |
|
866 break; |
|
867 |
|
868 - case RDS_CMSG_RDMA_STATUS: |
|
869 + case RDS_CMSG_RDMA_SEND_STATUS: |
|
870 if (cmsg->cmsg_len < CMSG_LEN(sizeof(notify))) |
|
871 die("RDS_CMSG_RDMA_DEST data too small"); |
|
872 memcpy(¬ify, CMSG_DATA(cmsg), sizeof(notify)); |
|
873 - rdma_mark_completed(tasks, notify.user_token, notify.status); |
|
874 + rdma_mark_completed(tasks, notify.user_token, notify.status, opts); |
|
875 break; |
|
876 } |
|
877 } |
|
878 @@ -1445,7 +1742,8 @@ |
|
879 |
|
880 static int recv_one(int fd, struct task *tasks, |
|
881 struct options *opts, |
|
882 - struct child_control *ctl) |
|
883 + struct child_control *ctl, |
|
884 + struct child_control *all_ctl) |
|
885 { |
|
886 char buf[max(opts->req_size, opts->ack_size)]; |
|
887 rds_rdma_cookie_t rdma_dest = 0; |
|
888 @@ -1456,15 +1754,18 @@ |
|
889 uint16_t expect_index; |
|
890 int task_index; |
|
891 ssize_t ret; |
|
892 + int check_status; |
|
893 |
|
894 - ret = recv_message(fd, buf, sizeof(buf), &rdma_dest, &sin, &tstamp, tasks); |
|
895 + |
|
896 + ret = recv_message(fd, buf, sizeof(buf), &rdma_dest, &sin, &tstamp, tasks, opts); |
|
897 if (ret < 0) |
|
898 return ret; |
|
899 |
|
900 /* If we received only RDMA completions or cong updates, |
|
901 * ret will be 0 */ |
|
902 - if (ret == 0) |
|
903 + if (ret == 0) { |
|
904 return 0; |
|
905 + } |
|
906 |
|
907 /* check the incoming sequence number */ |
|
908 task_index = ntohs(sin.sin_port) - opts->starting_port - 1; |
|
909 @@ -1508,16 +1809,32 @@ |
|
910 hdr.to_port = t->src_addr.sin_port; |
|
911 hdr.index = expect_index; |
|
912 |
|
913 - if (check_hdr(buf, ret, &hdr)) |
|
914 - die("header from %s:%u to id %u bogus\n", |
|
915 - inet_ntoa(sin.sin_addr), htons(sin.sin_port), |
|
916 - ntohs(t->src_addr.sin_port)); |
|
917 + check_status = check_hdr(buf, ret, &hdr, opts); |
|
918 + if (check_status) { |
|
919 + if (check_status > 0) { |
|
920 + die("header from %s:%u to id %u bogus\n", |
|
921 + inet_ntoa(sin.sin_addr), htons(sin.sin_port), |
|
922 + ntohs(t->src_addr.sin_port)); |
|
923 + } else |
|
924 + return 0; |
|
925 + } |
|
926 |
|
927 if (hdr.op == OP_ACK) { |
|
928 - stat_inc(&ctl->cur[S_RTT_USECS], |
|
929 - usec_sub(&tstamp, &t->send_time[expect_index])); |
|
930 - t->pending -= 1; |
|
931 + uint64_t rtt_time = |
|
932 + usec_sub(&tstamp, &t->send_time[expect_index]); |
|
933 |
|
934 + stat_inc(&ctl->cur[S_RTT_USECS], rtt_time); |
|
935 + if (rtt_time > rtt_threshold) |
|
936 + print_outlier("Found RTT = 0x%lx\n", rtt_time); |
|
937 + |
|
938 + if (show_histogram) |
|
939 + { |
|
940 + ctl->latency_histogram[get_bucket(rtt_time)]++; |
|
941 + } |
|
942 + |
|
943 + if (t->pending > 0) |
|
944 + t->pending -= 1; |
|
945 + |
|
946 if (in_hdr.rdma_key) |
|
947 rdma_process_ack(fd, &in_hdr, ctl); |
|
948 } else { |
|
949 @@ -1549,6 +1866,7 @@ |
|
950 } |
|
951 |
|
952 static void run_child(pid_t parent_pid, struct child_control *ctl, |
|
953 + struct child_control *all_ctl, |
|
954 struct options *opts, uint16_t id, int active) |
|
955 { |
|
956 struct sockaddr_in sin; |
|
957 @@ -1559,8 +1877,15 @@ |
|
958 struct task tasks[opts->nr_tasks]; |
|
200 struct timeval start; |
959 struct timeval start; |
201 int do_work = opts->simplex ? active : 1; |
960 int do_work = opts->simplex ? active : 1; |
202 |
961 + int j; |
962 |
|
963 + |
|
203 +#if defined(__SVR4) && defined(__sun) |
964 +#if defined(__SVR4) && defined(__sun) |
204 + set_my_lgrp(); |
965 + set_my_lgrp(); |
205 + sin.sin_family = AF_INET_OFFLOAD; |
966 + sin.sin_family = AF_INET_OFFLOAD; |
206 +#else |
967 +#else |
207 sin.sin_family = AF_INET; |
968 sin.sin_family = AF_INET; |
208 +#endif |
969 +#endif |
209 sin.sin_port = htons(opts->starting_port + 1 + id); |
970 sin.sin_port = htons(opts->starting_port + 1 + id); |
210 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
971 sin.sin_addr.s_addr = htonl(opts->receive_addr); |
211 |
972 |
212 @@ -1572,7 +1626,11 @@ |
973 @@ -1572,7 +1897,11 @@ |
213 for (i = 0; i < opts->nr_tasks; i++) { |
974 for (i = 0; i < opts->nr_tasks; i++) { |
214 tasks[i].nr = i; |
975 tasks[i].nr = i; |
215 tasks[i].src_addr = sin; |
976 tasks[i].src_addr = sin; |
216 +#if defined(__SVR4) && defined(__sun) |
977 +#if defined(__SVR4) && defined(__sun) |
217 + tasks[i].dst_addr.sin_family = AF_INET_OFFLOAD; |
978 + tasks[i].dst_addr.sin_family = AF_INET_OFFLOAD; |
219 tasks[i].dst_addr.sin_family = AF_INET; |
980 tasks[i].dst_addr.sin_family = AF_INET; |
220 +#endif |
981 +#endif |
221 tasks[i].dst_addr.sin_addr.s_addr = htonl(opts->send_addr); |
982 tasks[i].dst_addr.sin_addr.s_addr = htonl(opts->send_addr); |
222 tasks[i].dst_addr.sin_port = htons(opts->starting_port + 1 + i); |
983 tasks[i].dst_addr.sin_port = htons(opts->starting_port + 1 + i); |
223 tasks[i].send_time = alloca(opts->req_depth * sizeof(struct timeval)); |
984 tasks[i].send_time = alloca(opts->req_depth * sizeof(struct timeval)); |
224 @@ -1625,6 +1683,10 @@ |
985 @@ -1581,6 +1910,15 @@ |
986 tasks[i].rdma_buf = alloca(opts->req_depth * sizeof(uint64_t *)); |
|
987 tasks[i].local_buf = alloca(opts->req_depth * sizeof(uint64_t *)); |
|
988 tasks[i].ack_header = alloca(opts->req_depth * sizeof(struct header)); |
|
989 + tasks[i].ack2_header = alloca(opts->req_depth * sizeof(struct header)); |
|
990 + for (j=0;j<opts->req_depth;j++) |
|
991 + tasks[i].ack2_header[j].pending = 0; |
|
992 + |
|
993 + tasks[i].req_header = alloca(opts->req_depth * sizeof(struct header)); |
|
994 + for (j=0;j<opts->req_depth;j++) |
|
995 + tasks[i].req_header[j].pending = 0; |
|
996 + |
|
997 + tasks[i].retry_token = alloca(2 * opts->req_depth * sizeof(uint64_t)); |
|
998 tasks[i].rdma_next_op = (i & 1)? RDMA_OP_READ : RDMA_OP_WRITE; |
|
999 } |
|
1000 |
|
1001 @@ -1611,7 +1949,7 @@ |
|
1002 |
|
1003 check_parent(parent_pid); |
|
1004 |
|
1005 - ret = poll(&pfd, 1, -1); |
|
1006 + ret = poll(&pfd, 1, 1000); |
|
1007 if (ret < 0) { |
|
1008 if (errno == EINTR) |
|
1009 continue; |
|
1010 @@ -1621,10 +1959,14 @@ |
|
1011 pfd.events = POLLIN; |
|
1012 |
|
1013 if (pfd.revents & POLLIN) { |
|
1014 - while (recv_one(fd, tasks, opts, ctl) >= 0) |
|
1015 + while (recv_one(fd, tasks, opts, ctl, all_ctl) >= 0) |
|
225 ; |
1016 ; |
226 } |
1017 } |
227 |
1018 |
228 + /* stop sending if in shutdown phase */ |
1019 + /* stop sending if in shutdown phase */ |
229 + if (ctl->stopping) |
1020 + if (ctl->stopping) |
230 + continue; |
1021 + continue; |
231 + |
1022 + |
232 /* keep the pipeline full */ |
1023 /* keep the pipeline full */ |
233 can_send = !!(pfd.revents & POLLOUT); |
1024 can_send = !!(pfd.revents & POLLOUT); |
234 for (i = 0, t = tasks; i < opts->nr_tasks; i++, t++) { |
1025 for (i = 0, t = tasks; i < opts->nr_tasks; i++, t++) { |
235 @@ -1665,8 +1727,12 @@ |
1026 @@ -1633,6 +1975,7 @@ |
1027 if (t->drain_rdmas) |
|
1028 continue; |
|
1029 if (send_anything(fd, t, opts, ctl, can_send, do_work) < 0) { |
|
1030 + |
|
1031 pfd.events |= POLLOUT; |
|
1032 |
|
1033 /* If the send queue is full, we will see EAGAIN. |
|
1034 @@ -1665,8 +2008,12 @@ |
|
236 uint32_t i; |
1035 uint32_t i; |
237 |
1036 |
238 len = opts->nr_tasks * sizeof(*ctl); |
1037 len = opts->nr_tasks * sizeof(*ctl); |
239 +#if defined(__SVR4) && defined(__sun) |
1038 +#if defined(__SVR4) && defined(__sun) |
240 + ctl = (struct child_control *)mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
1039 + ctl = (struct child_control *)mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
243 0, 0); |
1042 0, 0); |
244 +#endif |
1043 +#endif |
245 if (ctl == MAP_FAILED) |
1044 if (ctl == MAP_FAILED) |
246 die("mmap of %u child control structs failed", opts->nr_tasks); |
1045 die("mmap of %u child control structs failed", opts->nr_tasks); |
247 |
1046 |
248 @@ -1699,7 +1765,7 @@ |
1047 @@ -1688,7 +2035,7 @@ |
1048 control_fd = -1; |
|
1049 } |
|
1050 rdma_key_o_meter_set_self(i); |
|
1051 - run_child(parent, ctl + i, opts, i, active); |
|
1052 + run_child(parent, ctl + i, ctl, opts, i, active); |
|
1053 exit(0); |
|
1054 } |
|
1055 ctl[i].pid = pid; |
|
1056 @@ -1699,7 +2046,7 @@ |
|
249 continue; |
1057 continue; |
250 pid = waitpid(-1, NULL, WNOHANG); |
1058 pid = waitpid(-1, NULL, WNOHANG); |
251 if (pid) |
1059 if (pid) |
252 - die("child %u (pid %u) exited\n", i, pid); |
1060 - die("child %u (pid %u) exited\n", i, pid); |
253 + die("child %u (pid %u) exited\n", i, (int)pid); |
1061 + die("child %u (pid %u) exited\n", i, (int)pid); |
254 sleep(1); |
1062 sleep(1); |
255 i--; /* try this child again */ |
1063 i--; /* try this child again */ |
256 } |
1064 } |
257 @@ -1823,6 +1889,7 @@ |
1065 @@ -1823,6 +2170,7 @@ |
258 |
1066 |
259 if (disable) |
1067 if (disable) |
260 return; |
1068 return; |
261 +#if !(defined(__SVR4) && defined(__sun)) |
1069 +#if !(defined(__SVR4) && defined(__sun)) |
262 if ((fp = fopen("/proc/stat", "r")) == NULL) { |
1070 if ((fp = fopen("/proc/stat", "r")) == NULL) { |
263 fprintf(stderr, "Cannot open /proc/stat (%s) - " |
1071 fprintf(stderr, "Cannot open /proc/stat (%s) - " |
264 "not printing cpu stats\n", |
1072 "not printing cpu stats\n", |
265 @@ -1856,10 +1923,37 @@ |
1073 @@ -1856,10 +2204,37 @@ |
266 } |
1074 } |
267 } |
1075 } |
268 fclose(fp); |
1076 fclose(fp); |
269 +#else |
1077 +#else |
270 +#define NSEC_TO_TICK(v) (v * sysconf(_SC_CLK_TCK)/1000000000) |
1078 +#define NSEC_TO_TICK(v) (v * sysconf(_SC_CLK_TCK)/1000000000) |
298 + ",intr:count"); |
1106 + ",intr:count"); |
299 +#endif |
1107 +#endif |
300 } else { |
1108 } else { |
301 struct sys_stats sys; |
1109 struct sys_stats sys; |
302 unsigned long sum = 0; |
1110 unsigned long sum = 0; |
303 @@ -1884,6 +1978,7 @@ |
1111 @@ -1884,6 +2259,7 @@ |
304 * 5 irq |
1112 * 5 irq |
305 * 6 softirq |
1113 * 6 softirq |
306 */ |
1114 */ |
307 +#if !(defined(__SVR4) && defined(__sun)) |
1115 +#if !(defined(__SVR4) && defined(__sun)) |
308 printf(",%f,%f,%f,%f,%Lu", |
1116 printf(",%f,%f,%f,%f,%Lu", |
309 (sys.times[0] + sys.times[1]) * scale, |
1117 (sys.times[0] + sys.times[1]) * scale, |
310 sys.times[2] * scale, |
1118 sys.times[2] * scale, |
311 @@ -1890,6 +1985,14 @@ |
1119 @@ -1890,6 +2266,14 @@ |
312 (sys.times[3] + sys.times[4]) * scale, |
1120 (sys.times[3] + sys.times[4]) * scale, |
313 (sys.times[5] + sys.times[6]) * scale, |
1121 (sys.times[5] + sys.times[6]) * scale, |
314 sys.intr); |
1122 sys.intr); |
315 +#else |
1123 +#else |
316 + /* Solaris kstat doesn't provide irq/softirq info. */ |
1124 + /* Solaris kstat doesn't provide irq/softirq info. */ |
321 + sys.intr); |
1129 + sys.intr); |
322 +#endif |
1130 +#endif |
323 } |
1131 } |
324 prev = current; |
1132 prev = current; |
325 } |
1133 } |
326 @@ -1903,6 +2006,10 @@ |
1134 @@ -1903,6 +2287,10 @@ |
327 static socklen_t buflen = 0; |
1135 static socklen_t buflen = 0; |
328 static int sock_fd = -1; |
1136 static int sock_fd = -1; |
329 int i, count, item_size; |
1137 int i, count, item_size; |
330 +#if defined(__SVR4) && defined(__sun) |
1138 +#if defined(__SVR4) && defined(__sun) |
331 + socklen_t len; |
1139 + socklen_t len; |
332 + struct rds_info_arg arg; |
1140 + struct rds_info_arg arg; |
333 +#endif |
1141 +#endif |
334 |
1142 |
335 if (sock_fd < 0) { |
1143 if (sock_fd < 0) { |
336 sock_fd = socket(pf, SOCK_SEQPACKET, 0); |
1144 sock_fd = socket(pf, SOCK_SEQPACKET, 0); |
337 @@ -1912,6 +2019,7 @@ |
1145 @@ -1912,6 +2300,7 @@ |
338 |
1146 |
339 /* We should only loop once on the first call; after that the |
1147 /* We should only loop once on the first call; after that the |
340 * buffer requirements for RDS counters should not change. */ |
1148 * buffer requirements for RDS counters should not change. */ |
341 +#if !(defined(__SVR4) && defined(__sun)) |
1149 +#if !(defined(__SVR4) && defined(__sun)) |
342 while ((item_size = getsockopt(sock_fd, sol, RDS_INFO_COUNTERS, curr, &buflen)) < 0) { |
1150 while ((item_size = getsockopt(sock_fd, sol, RDS_INFO_COUNTERS, curr, &buflen)) < 0) { |
343 if (errno != ENOSPC) |
1151 if (errno != ENOSPC) |
344 die_errno("getsockopt(RDS_INFO_COUNTERS) failed"); |
1152 die_errno("getsockopt(RDS_INFO_COUNTERS) failed"); |
345 @@ -1919,7 +2027,29 @@ |
1153 @@ -1919,7 +2308,29 @@ |
346 if (!curr) |
1154 if (!curr) |
347 die_errno("Cannot allocate buffer for stats counters"); |
1155 die_errno("Cannot allocate buffer for stats counters"); |
348 } |
1156 } |
349 +#else |
1157 +#else |
350 + int retcode; |
1158 + int retcode; |
370 +#endif |
1178 +#endif |
371 + |
1179 + |
372 if (item_size > sizeof(*ctr)) |
1180 if (item_size > sizeof(*ctr)) |
373 die("Bad counter item size in RDS_INFO_COUNTERS (got %d, max %zd)\n", |
1181 die("Bad counter item size in RDS_INFO_COUNTERS (got %d, max %zd)\n", |
374 item_size, sizeof(*ctr)); |
1182 item_size, sizeof(*ctr)); |
375 @@ -1932,8 +2062,11 @@ |
1183 @@ -1932,8 +2343,11 @@ |
376 } |
1184 } |
377 |
1185 |
378 for (i = 0; i < count; ++i) |
1186 for (i = 0; i < count; ++i) |
379 +#if !(defined(__SVR4) && defined(__sun)) |
1187 +#if !(defined(__SVR4) && defined(__sun)) |
380 memcpy(ctr + i, curr + i * item_size, item_size); |
1188 memcpy(ctr + i, curr + i * item_size, item_size); |
383 + memcpy(ctr + i, ((void *)(uintptr_t)arg.datap) + i * item_size, item_size); |
1191 + memcpy(ctr + i, ((void *)(uintptr_t)arg.datap) + i * item_size, item_size); |
384 +#endif |
1192 +#endif |
385 gettimeofday(&now, NULL); |
1193 gettimeofday(&now, NULL); |
386 |
1194 |
387 if (initialize) { |
1195 if (initialize) { |
388 @@ -1957,6 +2090,10 @@ |
1196 @@ -1957,6 +2371,10 @@ |
389 memcpy(prev, ctr, count * sizeof(*ctr)); |
1197 memcpy(prev, ctr, count * sizeof(*ctr)); |
390 last_ts = now; |
1198 last_ts = now; |
391 |
1199 |
392 +#if defined(__SVR4) && defined(__sun) |
1200 +#if defined(__SVR4) && defined(__sun) |
393 + free((void *)(uintptr_t)arg.datap); |
1201 + free((void *)(uintptr_t)arg.datap); |
394 +#endif |
1202 +#endif |
395 + |
1203 + |
396 get_stats(initialize); |
1204 get_stats(initialize); |
397 } |
1205 } |
398 |
1206 |
399 @@ -1967,7 +2104,7 @@ |
1207 @@ -1967,7 +2385,7 @@ |
400 |
1208 |
401 pid = waitpid(-1, &status, wflags); |
1209 pid = waitpid(-1, &status, wflags); |
402 if (pid < 0) |
1210 if (pid < 0) |
403 - die("waitpid returned %u", pid); |
1211 - die("waitpid returned %u", pid); |
404 + die("waitpid returned %u", (int)pid); |
1212 + die("waitpid returned %u", (int)pid); |
405 if (pid == 0) |
1213 if (pid == 0) |
406 return 0; |
1214 return 0; |
407 |
1215 |
408 @@ -1975,15 +2112,15 @@ |
1216 @@ -1975,15 +2393,15 @@ |
409 if (WEXITSTATUS(status) == 0) |
1217 if (WEXITSTATUS(status) == 0) |
410 return 1; |
1218 return 1; |
411 die("child pid %u exited with status %d\n", |
1219 die("child pid %u exited with status %d\n", |
412 - pid, WEXITSTATUS(status)); |
1220 - pid, WEXITSTATUS(status)); |
413 + (int)pid, WEXITSTATUS(status)); |
1221 + (int)pid, WEXITSTATUS(status)); |
422 - die("child pid %u wait status %d\n", pid, status); |
1230 - die("child pid %u wait status %d\n", pid, status); |
423 + die("child pid %u wait status %d\n", (int)pid, status); |
1231 + die("child pid %u wait status %d\n", (int)pid, status); |
424 } |
1232 } |
425 |
1233 |
426 static void release_children_and_wait(struct options *opts, |
1234 static void release_children_and_wait(struct options *opts, |
427 @@ -2139,7 +2276,12 @@ |
1235 @@ -1995,9 +2413,13 @@ |
1236 struct counter summary[NR_STATS]; |
|
1237 struct timeval start, end, now, first_ts, last_ts; |
|
1238 double cpu_total = 0; |
|
1239 - uint16_t i, cpu_samples = 0; |
|
1240 + uint16_t i, j, cpu_samples = 0; |
|
1241 uint16_t nr_running; |
|
1242 + uint64_t latency_histogram[MAX_BUCKETS]; |
|
1243 |
|
1244 + if (show_histogram) |
|
1245 + memset(latency_histogram, 0, sizeof(latency_histogram)); |
|
1246 + |
|
1247 gettimeofday(&start, NULL); |
|
1248 start.tv_sec += 2; |
|
1249 for (i = 0; i < opts->nr_tasks; i++) |
|
1250 @@ -2139,7 +2561,12 @@ |
|
428 control_fd = -1; |
1251 control_fd = -1; |
429 |
1252 |
430 if (nr_running) { |
1253 if (nr_running) { |
431 + /* let everything gracefully stop before we kill the chillins */ |
1254 + /* let everything gracefully stop before we kill the chillins */ |
432 for (i = 0; i < opts->nr_tasks; i++) |
1255 for (i = 0; i < opts->nr_tasks; i++) |
435 + |
1258 + |
436 + for (i = 0; i < opts->nr_tasks; i++) |
1259 + for (i = 0; i < opts->nr_tasks; i++) |
437 kill(ctl[i].pid, SIGTERM); |
1260 kill(ctl[i].pid, SIGTERM); |
438 stop_soakers(soak_arr); |
1261 stop_soakers(soak_arr); |
439 } |
1262 } |
440 @@ -2517,7 +2659,11 @@ |
1263 @@ -2167,6 +2594,19 @@ |
1264 avg(&summary[S_SENDMSG_USECS]), |
|
1265 avg(&summary[S_RTT_USECS]), |
|
1266 soak_arr? scale * cpu_total : -1.0); |
|
1267 + |
|
1268 + if (show_histogram) |
|
1269 + { |
|
1270 + for (i = 0; i < opts->nr_tasks; i++) |
|
1271 + for (j=0;j < MAX_BUCKETS; j++) |
|
1272 + latency_histogram[j] += ctl[i].latency_histogram[j]; |
|
1273 + |
|
1274 + printf("\nRTT histogram\n"); |
|
1275 + printf("RTT (us) \t\t Count\n"); |
|
1276 + for (i=0;i < MAX_BUCKETS; i++) |
|
1277 + printf("[%6u - %6u] \t\t %8u\n", 1 << i, 1 << (i+1), |
|
1278 + (unsigned int)latency_histogram[i]); |
|
1279 + } |
|
1280 } |
|
1281 } |
|
1282 |
|
1283 @@ -2220,6 +2660,21 @@ |
|
1284 { |
|
1285 ssize_t ret; |
|
1286 |
|
1287 + if (size == sizeof(struct options)) { |
|
1288 + memset(ptr, 0, size); |
|
1289 + ret = read(fd, peer_version, VERSION_MAX_LEN); |
|
1290 + if (ret != VERSION_MAX_LEN) |
|
1291 + die_errno("Failed to read version"); |
|
1292 + |
|
1293 + if (strcmp(peer_version, RDS_VERSION)) { |
|
1294 + ptr += ret; |
|
1295 + memcpy(ptr, peer_version, VERSION_MAX_LEN); |
|
1296 + size = sizeof(struct options_2_0_6) - ret; |
|
1297 + } else |
|
1298 + size -= ret; |
|
1299 + ptr += ret; |
|
1300 + } |
|
1301 + |
|
1302 while (size) { |
|
1303 ret = read(fd, ptr, size); |
|
1304 if (ret < 0) |
|
1305 @@ -2233,6 +2688,7 @@ |
|
1306 |
|
1307 static void encode_options(struct options *dst, const struct options *src) |
|
1308 { |
|
1309 + memcpy(dst->version, src->version, VERSION_MAX_LEN); |
|
1310 dst->req_depth = htonl(src->req_depth); |
|
1311 dst->req_size = htonl(src->req_size); |
|
1312 dst->ack_size = htonl(src->ack_size); |
|
1313 @@ -2262,10 +2718,13 @@ |
|
1314 dst->simplex = src->simplex; /* byte sized */ |
|
1315 dst->rw_mode = src->rw_mode; /* byte sized */ |
|
1316 dst->rdma_vector = htonl(src->rdma_vector); |
|
1317 + dst->tos = src->tos; |
|
1318 + dst->async = src->async; |
|
1319 } |
|
1320 |
|
1321 static void decode_options(struct options *dst, const struct options *src) |
|
1322 { |
|
1323 + memcpy(dst->version, src->version, VERSION_MAX_LEN); |
|
1324 dst->req_depth = ntohl(src->req_depth); |
|
1325 dst->req_size = ntohl(src->req_size); |
|
1326 dst->ack_size = ntohl(src->ack_size); |
|
1327 @@ -2295,6 +2754,8 @@ |
|
1328 dst->simplex = src->simplex; /* byte sized */ |
|
1329 dst->rw_mode = src->rw_mode; /* byte sized */ |
|
1330 dst->rdma_vector = ntohl(src->rdma_vector); |
|
1331 + dst->tos = src->tos; |
|
1332 + dst->async = src->async; |
|
1333 } |
|
1334 |
|
1335 static void verify_option_encdec(const struct options *opts) |
|
1336 @@ -2316,6 +2777,25 @@ |
|
1337 die("encode/decode check of options struct failed"); |
|
1338 } |
|
1339 |
|
1340 +static void reset_conn(struct options *opts) |
|
1341 +{ |
|
1342 + struct rds_reset val; |
|
1343 + int fd; |
|
1344 + struct sockaddr_in sin; |
|
1345 + |
|
1346 + sin.sin_family = AF_INET; |
|
1347 + sin.sin_port = htons(opts->starting_port); |
|
1348 + sin.sin_addr.s_addr = htonl(opts->receive_addr); |
|
1349 + |
|
1350 + fd = bound_socket(pf, SOCK_SEQPACKET, 0, &sin); |
|
1351 + |
|
1352 + val.tos = opts->tos; |
|
1353 + val.src.s_addr = htonl(opts->receive_addr); |
|
1354 + val.dst.s_addr = htonl(opts->send_addr); |
|
1355 + if (setsockopt(fd, sol, RDS_CONN_RESET, &val, sizeof(val))) |
|
1356 + die_errno("setsockopt RDS_CONN_RESET failed"); |
|
1357 +} |
|
1358 + |
|
1359 static int active_parent(struct options *opts, struct soak_control *soak_arr) |
|
1360 { |
|
1361 struct options enc_options; |
|
1362 @@ -2324,6 +2804,11 @@ |
|
1363 int fd; |
|
1364 uint8_t ok; |
|
1365 |
|
1366 + if (reset_connection) { |
|
1367 + reset_conn(opts); |
|
1368 + return 0; |
|
1369 + } |
|
1370 + |
|
1371 if (opts->show_params) { |
|
1372 unsigned int k; |
|
1373 |
|
1374 @@ -2387,7 +2872,11 @@ |
|
1375 * We just tell the peer what options to use. |
|
1376 */ |
|
1377 encode_options(&enc_options, opts); |
|
1378 - peer_send(fd, &enc_options, sizeof(struct options)); |
|
1379 + if (opts->tos || opts->async) |
|
1380 + peer_send(fd, &enc_options, sizeof(struct options)); |
|
1381 + else |
|
1382 + peer_send(fd, &enc_options.req_depth, |
|
1383 + sizeof(struct options_2_0_6)); |
|
1384 |
|
1385 printf("negotiated options, tasks will start in 2 seconds\n"); |
|
1386 ctl = start_children(opts, 1); |
|
1387 @@ -2517,7 +3006,11 @@ |
|
441 /* an extra terminating entry which will be all 0s */ |
1388 /* an extra terminating entry which will be all 0s */ |
442 len = (nr_soak + 1) * sizeof(struct soak_control); |
1389 len = (nr_soak + 1) * sizeof(struct soak_control); |
443 soak_arr = mmap(NULL, len, PROT_READ|PROT_WRITE, |
1390 soak_arr = mmap(NULL, len, PROT_READ|PROT_WRITE, |
444 +#if defined(__SVR4) && defined(__sun) |
1391 +#if defined(__SVR4) && defined(__sun) |
445 + MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
1392 + MAP_ANONYMOUS|MAP_SHARED, -1, 0); |
447 MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
1394 MAP_ANONYMOUS|MAP_SHARED, 0, 0); |
448 +#endif |
1395 +#endif |
449 if (soak_arr == MAP_FAILED) |
1396 if (soak_arr == MAP_FAILED) |
450 die("mmap of %ld soak control structs failed", nr_soak); |
1397 die("mmap of %ld soak control structs failed", nr_soak); |
451 |
1398 |
452 @@ -2589,6 +2735,7 @@ |
1399 @@ -2572,6 +3065,10 @@ |
1400 OPT_CONNECT_RETRIES, |
|
1401 OPT_USE_CONG_MONITOR, |
|
1402 OPT_PERFDATA, |
|
1403 + OPT_SHOW_OUTLIERS, |
|
1404 + OPT_SHOW_HISTOGRAM, |
|
1405 + OPT_RESET, |
|
1406 + OPT_ASYNC, |
|
1407 }; |
|
1408 |
|
1409 static struct option long_options[] = { |
|
1410 @@ -2584,11 +3081,13 @@ |
|
1411 { "send-addr", required_argument, NULL, 's' }, |
|
1412 { "port", required_argument, NULL, 'p' }, |
|
1413 { "time", required_argument, NULL, 'T' }, |
|
1414 +{ "tos", required_argument, NULL, 'Q' }, |
|
1415 { "report-cpu", no_argument, NULL, 'c' }, |
|
1416 { "report-summary", no_argument, NULL, 'z' }, |
|
453 { "rtprio", no_argument, NULL, 'R' }, |
1417 { "rtprio", no_argument, NULL, 'R' }, |
454 { "verify", no_argument, NULL, 'v' }, |
1418 { "verify", no_argument, NULL, 'v' }, |
455 { "trace", no_argument, NULL, 'V' }, |
1419 { "trace", no_argument, NULL, 'V' }, |
456 +{ "lgrpid", required_argument, NULL, 'g' }, |
1420 +{ "lgrpid", required_argument, NULL, 'g' }, |
457 |
1421 |
458 { "rdma-use-once", required_argument, NULL, OPT_RDMA_USE_ONCE }, |
1422 { "rdma-use-once", required_argument, NULL, OPT_RDMA_USE_ONCE }, |
459 { "rdma-use-get-mr", required_argument, NULL, OPT_RDMA_USE_GET_MR }, |
1423 { "rdma-use-get-mr", required_argument, NULL, OPT_RDMA_USE_GET_MR }, |
460 @@ -2652,7 +2799,7 @@ |
1424 @@ -2601,6 +3100,10 @@ |
1425 { "show-perfdata", no_argument, NULL, OPT_PERFDATA }, |
|
1426 { "connect-retries", required_argument, NULL, OPT_CONNECT_RETRIES }, |
|
1427 { "use-cong-monitor", required_argument, NULL, OPT_USE_CONG_MONITOR }, |
|
1428 +{ "show-outliers", required_argument, NULL, OPT_SHOW_OUTLIERS }, |
|
1429 +{ "show-histogram", no_argument, NULL, OPT_SHOW_HISTOGRAM }, |
|
1430 +{ "reset", no_argument, NULL, OPT_RESET }, |
|
1431 +{ "async", no_argument, NULL, OPT_ASYNC }, |
|
1432 |
|
1433 { NULL } |
|
1434 }; |
|
1435 @@ -2640,6 +3143,8 @@ |
|
1436 opts.use_cong_monitor = 1; |
|
1437 opts.rdma_use_fence = 1; |
|
1438 opts.rdma_cache_mrs = 0; |
|
1439 + opts.rdma_use_once = 0; |
|
1440 + opts.rdma_use_get_mr = 0; |
|
1441 opts.rdma_alignment = 0; |
|
1442 opts.rdma_key_o_meter = 0; |
|
1443 opts.show_params = 0; |
|
1444 @@ -2648,11 +3153,17 @@ |
|
1445 opts.simplex = 0; |
|
1446 opts.rw_mode = 0; |
|
1447 opts.rdma_vector = 1; |
|
1448 + rtt_threshold = ~0U; |
|
1449 + show_histogram = 0; |
|
1450 + opts.tos = 0; |
|
1451 + reset_connection = 0; |
|
1452 + opts.async = 0; |
|
1453 + strcpy(opts.version, RDS_VERSION); |
|
1454 |
|
461 while(1) { |
1455 while(1) { |
462 int c, index; |
1456 int c, index; |
463 |
1457 |
464 - c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVz", |
1458 - c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVz", |
465 + c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:vVg:z", |
1459 + c = getopt_long(argc, argv, "+a:cD:d:hI:M:op:q:Rr:s:t:T:Q:vVg:z", |
466 long_options, &index); |
1460 long_options, &index); |
467 if (c == -1) |
1461 if (c == -1) |
468 break; |
1462 break; |
469 @@ -2711,6 +2858,10 @@ |
1463 @@ -2702,6 +3213,9 @@ |
1464 case 'T': |
|
1465 opts.run_time = parse_ull(optarg, (uint32_t)~0); |
|
1466 break; |
|
1467 + case 'Q': |
|
1468 + opts.tos = parse_ull(optarg, (uint8_t)~0); |
|
1469 + break; |
|
1470 case 'z': |
|
1471 opts.summary_only = 1; |
|
1472 break; |
|
1473 @@ -2711,9 +3225,25 @@ |
|
470 case 'V': |
1474 case 'V': |
471 opts.tracing = 1; |
1475 opts.tracing = 1; |
472 break; |
1476 break; |
473 + case 'g': |
1477 + case 'g': |
474 + lgrp_id = (lgrp_id_t)parse_ull(optarg, |
1478 + lgrp_id = (lgrp_id_t)parse_ull(optarg, |
475 + (uint32_t)~0); |
1479 + (uint32_t)~0); |
476 + break; |
1480 + break; |
1481 + case OPT_SHOW_OUTLIERS: |
|
1482 + rtt_threshold = parse_ull(optarg, ~0U); |
|
1483 + break; |
|
1484 + case OPT_SHOW_HISTOGRAM: |
|
1485 + show_histogram = 1; |
|
1486 + break; |
|
477 case OPT_USE_CONG_MONITOR: |
1487 case OPT_USE_CONG_MONITOR: |
478 opts.use_cong_monitor = parse_ull(optarg, 1); |
1488 opts.use_cong_monitor = parse_ull(optarg, 1); |
479 break; |
1489 break; |
480 @@ -2786,6 +2937,7 @@ |
1490 + case OPT_RESET: |
1491 + reset_connection = 1; |
|
1492 + break; |
|
1493 + case OPT_ASYNC: |
|
1494 + opts.async = 1; |
|
1495 + break; |
|
1496 case OPT_RDMA_USE_ONCE: |
|
1497 opts.rdma_use_once = parse_ull(optarg, 1); |
|
1498 break; |
|
1499 @@ -2786,6 +3316,7 @@ |
|
481 if (opts.rdma_size && 0) |
1500 if (opts.rdma_size && 0) |
482 opts.rdma_size = (opts.rdma_size + 4095) & ~4095; |
1501 opts.rdma_size = (opts.rdma_size + 4095) & ~4095; |
483 |
1502 |
484 + set_my_lgrp(); |
1503 + set_my_lgrp(); |
485 opt = opts; |
1504 opt = opts; |
542 +#endif |
1561 +#endif |
543 + |
1562 + |
544 /* Like inet_ntoa, but can be re-entered several times without clobbering |
1563 /* Like inet_ntoa, but can be re-entered several times without clobbering |
545 * the previously returned string. */ |
1564 * the previously returned string. */ |
546 static const char *paddr(int af, const void *addrp) |
1565 static const char *paddr(int af, const void *addrp) |
547 @@ -234,8 +250,10 @@ |
1566 @@ -134,18 +150,20 @@ |
1567 { |
|
1568 struct rds_info_connection conn; |
|
1569 |
|
1570 - printf("\nRDS Connections:\n%15s %15s %16s %16s %3s\n", |
|
1571 - "LocalAddr", "RemoteAddr", "NextTX", "NextRX", "Flg"); |
|
1572 + printf("\nRDS Connections:\n%15s %15s %4s %16s %16s %4s\n", |
|
1573 + "LocalAddr", "RemoteAddr", "Tos", "NextTX", "NextRX", "Flgs"); |
|
1574 |
|
1575 for_each(conn, data, each, len) { |
|
1576 - printf("%15s %15s %16"PRIu64" %16"PRIu64" %c%c%c\n", |
|
1577 + printf("%15s %15s %4u %16"PRIu64" %16"PRIu64" %c%c%c%c\n", |
|
1578 ipv4addr(conn.laddr), |
|
1579 ipv4addr(conn.faddr), |
|
1580 + conn.tos, |
|
1581 conn.next_tx_seq, |
|
1582 conn.next_rx_seq, |
|
1583 rds_conn_flag(conn, SENDING, 's'), |
|
1584 rds_conn_flag(conn, CONNECTING, 'c'), |
|
1585 - rds_conn_flag(conn, CONNECTED, 'C')); |
|
1586 + rds_conn_flag(conn, CONNECTED, 'C'), |
|
1587 + rds_conn_flag(conn, ERROR, 'E')); |
|
1588 } |
|
1589 } |
|
1590 |
|
1591 @@ -153,16 +171,17 @@ |
|
1592 { |
|
1593 struct rds_info_message msg; |
|
1594 |
|
1595 - printf("\n%s Message Queue:\n%15s %5s %15s %5s %16s %10s\n", |
|
1596 + printf("\n%s Message Queue:\n%15s %5s %15s %5s %4s %16s %10s\n", |
|
1597 (char *)extra, |
|
1598 - "LocalAddr", "LPort", "RemoteAddr", "RPort", "Seq", "Bytes"); |
|
1599 + "LocalAddr", "LPort", "RemoteAddr", "RPort", "Tos","Seq", "Bytes"); |
|
1600 |
|
1601 for_each(msg, data, each, len) { |
|
1602 - printf("%15s %5u %15s %5u %16"PRIu64" %10u\n", |
|
1603 + printf("%15s %5u %15s %5u %4u %16"PRIu64" %10u\n", |
|
1604 ipv4addr(msg.laddr), |
|
1605 ntohs(msg.lport), |
|
1606 ipv4addr(msg.faddr), |
|
1607 ntohs(msg.fport), |
|
1608 + msg.tos, |
|
1609 msg.seq, msg.len); |
|
1610 } |
|
1611 } |
|
1612 @@ -191,13 +210,14 @@ |
|
1613 { |
|
1614 struct rds_info_rdma_connection ic; |
|
1615 |
|
1616 - printf("\nRDS IB Connections:\n%15s %15s %32s %32s\n", |
|
1617 - "LocalAddr", "RemoteAddr", "LocalDev", "RemoteDev"); |
|
1618 + printf("\nRDS IB Connections:\n%15s %15s %4s %3s %32s %32s\n", |
|
1619 + "LocalAddr", "RemoteAddr", "Tos", "SL", "LocalDev", "RemoteDev"); |
|
1620 |
|
1621 for_each(ic, data, each, len) { |
|
1622 - printf("%15s %15s %32s %32s", |
|
1623 + printf("%15s %15s %4u %3u %32s %32s", |
|
1624 ipv4addr(ic.src_addr), |
|
1625 ipv4addr(ic.dst_addr), |
|
1626 + ic.tos,ic.sl, |
|
1627 ipv6addr(ic.src_gid), |
|
1628 ipv6addr(ic.dst_gid)); |
|
1629 |
|
1630 @@ -234,8 +254,10 @@ |
|
548 print_msgs, "Send", 0 }, |
1631 print_msgs, "Send", 0 }, |
549 ['t'] = { RDS_INFO_RETRANS_MESSAGES, "retransmit queue messages", |
1632 ['t'] = { RDS_INFO_RETRANS_MESSAGES, "retransmit queue messages", |
550 print_msgs, "Retransmit", 0 }, |
1633 print_msgs, "Retransmit", 0 }, |
551 +#if !(defined(__SVR4) && defined(__sun)) |
1634 +#if !(defined(__SVR4) && defined(__sun)) |
552 ['T'] = { RDS_INFO_TCP_SOCKETS, "TCP transport sockets", |
1635 ['T'] = { RDS_INFO_TCP_SOCKETS, "TCP transport sockets", |
553 print_tcp_socks, NULL, 0 }, |
1636 print_tcp_socks, NULL, 0 }, |
554 +#endif |
1637 +#endif |
555 ['I'] = { RDS_INFO_IB_CONNECTIONS, "IB transport connections", |
1638 ['I'] = { RDS_INFO_IB_CONNECTIONS, "IB transport connections", |
556 print_ib_conns, NULL, 0 }, |
1639 print_ib_conns, NULL, 0 }, |
557 }; |
1640 }; |
558 @@ -266,6 +284,9 @@ |
1641 @@ -266,6 +288,9 @@ |
559 char optstring[258] = "v+"; |
1642 char optstring[258] = "v+"; |
560 int given_options = 0; |
1643 int given_options = 0; |
561 socklen_t len = 0; |
1644 socklen_t len = 0; |
562 +#if defined(__SVR4) && defined(__sun) |
1645 +#if defined(__SVR4) && defined(__sun) |
563 + struct rds_info_arg arg; |
1646 + struct rds_info_arg arg; |
564 +#endif |
1647 +#endif |
565 void *data = NULL; |
1648 void *data = NULL; |
566 int fd; |
1649 int fd; |
567 int each; |
1650 int each; |
568 @@ -322,6 +343,7 @@ |
1651 @@ -322,6 +347,7 @@ |
569 (given_options && !infos[i].option_given)) |
1652 (given_options && !infos[i].option_given)) |
570 continue; |
1653 continue; |
571 |
1654 |
572 +#if !(defined(__SVR4) && defined(__sun)) |
1655 +#if !(defined(__SVR4) && defined(__sun)) |
573 /* read in the info until we get a full snapshot */ |
1656 /* read in the info until we get a full snapshot */ |
574 while ((each = getsockopt(fd, sol, infos[i].opt_val, data, |
1657 while ((each = getsockopt(fd, sol, infos[i].opt_val, data, |
575 &len)) < 0) { |
1658 &len)) < 0) { |
576 @@ -345,15 +367,47 @@ |
1659 @@ -345,15 +371,47 @@ |
577 return 1; |
1660 return 1; |
578 } |
1661 } |
579 } |
1662 } |
580 +#else |
1663 +#else |
581 + int retcode; |
1664 + int retcode; |
723 .BR rds-rdma (7), |
1806 .BR rds-rdma (7), |
724 .BR socket (2), |
1807 .BR socket (2), |
725 diff -r -u /tmp/rds-tools-2.0.4/rds-info.1 rds-tools-2.0.7/rds-info.1 |
1808 diff -r -u /tmp/rds-tools-2.0.4/rds-info.1 rds-tools-2.0.7/rds-info.1 |
726 --- /tmp/rds-tools-2.0.4/rds-info.1 Wed Aug 4 15:25:11 2010 |
1809 --- /tmp/rds-tools-2.0.4/rds-info.1 Wed Aug 4 15:25:11 2010 |
727 +++ rds-tools-2.0.7/rds-info.1 Thu Feb 24 13:27:51 2011 |
1810 +++ rds-tools-2.0.7/rds-info.1 Thu Feb 24 13:27:51 2011 |
728 @@ -1,162 +1,150 @@ |
1811 @@ -1,162 +1,160 @@ |
729 -.Dd October 30, 2006 |
1812 -.Dd October 30, 2006 |
730 -.Dt RDS-INFO 1 |
1813 -.Dt RDS-INFO 1 |
731 -.Os |
1814 -.Os |
732 -.Sh NAME |
1815 -.Sh NAME |
733 -.Nm rds-info |
1816 -.Nm rds-info |
826 connection establishment. |
1909 connection establishment. |
827 -.It RemoteAddr |
1910 -.It RemoteAddr |
828 +.IP RemoteAddr |
1911 +.IP RemoteAddr |
829 The IP address of the remote end of the connection. |
1912 The IP address of the remote end of the connection. |
830 -.It NextTX |
1913 -.It NextTX |
1914 +.IP Tos |
|
1915 +The type of service value for this connection. |
|
831 +.IP NextTX |
1916 +.IP NextTX |
832 The sequence number that will be given to the next message that is sent |
1917 The sequence number that will be given to the next message that is sent |
833 over the connection. |
1918 over the connection. |
834 -.It NextRX |
1919 -.It NextRX |
835 +.IP NextRX |
1920 +.IP NextRX |
857 + The transport is attempting to connect to the |
1942 + The transport is attempting to connect to the |
858 + remote address. |
1943 + remote address. |
859 +.IP C |
1944 +.IP C |
860 + The connection to the remote host is connected |
1945 + The connection to the remote host is connected |
861 + and active. |
1946 + and active. |
1947 +.IP E |
|
1948 + The connection to the remote host is in error. |
|
862 + |
1949 + |
863 +.TP |
1950 +.TP |
864 +\fB\-r\fR, \fB\-s\fR, \fB\-t\fR |
1951 +\fB\-r\fR, \fB\-s\fR, \fB\-t\fR |
865 Display the messages in the receive, send, or retransmit queues respectively. |
1952 Display the messages in the receive, send, or retransmit queues respectively. |
866 -.Bl -tag -width 4 |
1953 -.Bl -tag -width 4 |
873 -.It RemoteAddr, RPort |
1960 -.It RemoteAddr, RPort |
874 +.IP RemoteAddr, RPort |
1961 +.IP RemoteAddr, RPort |
875 The remote IP address and port associated with the message. For sent messages |
1962 The remote IP address and port associated with the message. For sent messages |
876 this is the destination address, for receive messages it is the source address. |
1963 this is the destination address, for receive messages it is the source address. |
877 -.It Seq |
1964 -.It Seq |
1965 +.IP Tos |
|
1966 +The type of service for this message. |
|
878 +.IP Seq |
1967 +.IP Seq |
879 The sequence number of the message. |
1968 The sequence number of the message. |
880 -.It Bytes |
1969 -.It Bytes |
881 +.IP Bytes |
1970 +.IP Bytes |
882 The number of bytes in the message payload. |
1971 The number of bytes in the message payload. |
899 The local IP address of this connection. |
1988 The local IP address of this connection. |
900 -.It RemoteAddr |
1989 -.It RemoteAddr |
901 +.IP RemoteAddr |
1990 +.IP RemoteAddr |
902 The remote IP address of this connection. |
1991 The remote IP address of this connection. |
903 -.It LocalDev |
1992 -.It LocalDev |
1993 +.IP Tos |
|
1994 +The type of service value for this connection. |
|
1995 +.IP SL |
|
1996 +The QoS Service Level for this connection. |
|
904 +.IP LocalDev |
1997 +.IP LocalDev |
905 The local IB Global Identifier, printed in IPv6 address syntax. |
1998 The local IB Global Identifier, printed in IPv6 address syntax. |
906 -.It RemoteDev |
1999 -.It RemoteDev |
907 +.IP RemoteDev |
2000 +.IP RemoteDev |
908 The remote IB Global Identifier, printed in IPv6 address syntax. |
2001 The remote IB Global Identifier, printed in IPv6 address syntax. |
952 -.El |
2045 -.El |
953 -.Pp |
2046 -.Pp |
954 diff -r -u /tmp/rds-tools-2.0.4/rds-ping.1 rds-tools-2.0.7/rds-ping.1 |
2047 diff -r -u /tmp/rds-tools-2.0.4/rds-ping.1 rds-tools-2.0.7/rds-ping.1 |
955 --- /tmp/rds-tools-2.0.4/rds-ping.1 Wed Aug 4 15:25:11 2010 |
2048 --- /tmp/rds-tools-2.0.4/rds-ping.1 Wed Aug 4 15:25:11 2010 |
956 +++ rds-tools-2.0.7/rds-ping.1 Thu Feb 24 13:27:52 2011 |
2049 +++ rds-tools-2.0.7/rds-ping.1 Thu Feb 24 13:27:52 2011 |
957 @@ -1,69 +1,54 @@ |
2050 @@ -1,69 +1,63 @@ |
958 -.Dd Apr 22, 2008 |
2051 -.Dd Apr 22, 2008 |
959 -.Dt RDS-PING 1 |
2052 -.Dt RDS-PING 1 |
960 -.Os |
2053 -.Os |
961 -.Sh NAME |
2054 -.Sh NAME |
962 -.Nm rds-ping |
2055 -.Nm rds-ping |
977 -.Nm rds-ping |
2070 -.Nm rds-ping |
978 -is used to test whether a remote node is reachable over RDS. |
2071 -is used to test whether a remote node is reachable over RDS. |
979 -Its interface is designed to operate pretty much the standard |
2072 -Its interface is designed to operate pretty much the standard |
980 -.Xr ping 8 |
2073 -.Xr ping 8 |
981 +.SH SYNOPSIS |
2074 +.SH SYNOPSIS |
982 +.B rds-ping [-c count] [-i interval] [-I local_addr] remote_addr |
2075 +.HP |
2076 +.nf |
|
2077 +rds-ping [-c count] [-Q tos] [-i interval] [-I local_addr] |
|
2078 + remote_addr |
|
2079 +.fi |
|
983 + |
2080 + |
984 +.SH DESCRIPTION |
2081 +.SH DESCRIPTION |
985 +.PP |
2082 +.PP |
986 +rds-ping is used to test whether a remote node is reachable over RDS. |
2083 +rds-ping is used to test whether a remote node is reachable over RDS. |
987 +Its interface is designed to operate pretty much the standard ping(1M) |
2084 +Its interface is designed to operate pretty much the standard ping(1M) |
1009 packets. |
2106 packets. |
1010 -.It Fl I Ar address |
2107 -.It Fl I Ar address |
1011 -By default, |
2108 -By default, |
1012 -.Nm rds-ping |
2109 -.Nm rds-ping |
1013 -will pick the local source address for the RDS socket based |
2110 -will pick the local source address for the RDS socket based |
2111 +.TP |
|
2112 +\fB\-Q tos |
|
2113 +By default, rds-ping sends the ping requests on base (tos = 0) RDS connection. |
|
2114 +With this option, the requests are sent on RDS connection with the specified tos |
|
2115 +value. Valid values are 0-255. |
|
1014 +.TP |
2116 +.TP |
1015 +\fB\-I address |
2117 +\fB\-I address |
1016 +By default, rds-ping will pick the local source address for the RDS socket based |
2118 +By default, rds-ping will pick the local source address for the RDS socket based |
1017 on routing information for the destination address (i.e. if |
2119 on routing information for the destination address (i.e. if |
1018 packets to the given destination would be routed through interface |
2120 packets to the given destination would be routed through interface |
1070 |
2172 |
1071 AC_SUBST(VERSION) |
2173 AC_SUBST(VERSION) |
1072 diff -r -u /tmp/rds-tools-2.0.4/rds-ping.c rds-tools-2.0.7/rds-ping.c |
2174 diff -r -u /tmp/rds-tools-2.0.4/rds-ping.c rds-tools-2.0.7/rds-ping.c |
1073 --- /tmp/rds-tools-2.0.4/rds-ping.c Wed Aug 4 15:25:10 2010 |
2175 --- /tmp/rds-tools-2.0.4/rds-ping.c Wed Aug 4 15:25:10 2010 |
1074 +++ rds-tools-2.0.7/rds-ping.c Thu Feb 24 13:27:52 2011 |
2176 +++ rds-tools-2.0.7/rds-ping.c Thu Feb 24 13:27:52 2011 |
1075 @@ -48,7 +48,11 @@ |
2177 @@ -48,7 +48,12 @@ |
1076 #include <sys/poll.h> |
2178 #include <sys/poll.h> |
1077 #include <fcntl.h> |
2179 #include <fcntl.h> |
1078 #include <getopt.h> |
2180 #include <getopt.h> |
2181 +#include <sys/ioctl.h> |
|
1079 +#if defined(__SVR4) && defined(__sun) |
2182 +#if defined(__SVR4) && defined(__sun) |
1080 +#include <sys/rds.h> |
2183 +#include <sys/rds.h> |
1081 +#else |
2184 +#else |
1082 #include "rds.h" |
2185 #include "rds.h" |
1083 +#endif |
2186 +#endif |
1084 |
2187 |
1085 #include "pfhack.h" |
2188 #include "pfhack.h" |
1086 |
2189 |
1087 @@ -155,7 +159,12 @@ |
2190 @@ -67,6 +72,7 @@ |
2191 static unsigned long opt_count; |
|
2192 static struct in_addr opt_srcaddr; |
|
2193 static struct in_addr opt_dstaddr; |
|
2194 +static uint8_t opt_tos = 0; |
|
2195 |
|
2196 /* For reasons of simplicity, RDS ping does not use a packet |
|
2197 * payload that is being echoed, the way ICMP does. |
|
2198 @@ -91,6 +97,7 @@ |
|
2199 static int parse_timeval(const char *, struct timeval *); |
|
2200 static int parse_long(const char *ptr, unsigned long *); |
|
2201 static int parse_addr(const char *ptr, struct in_addr *); |
|
2202 +static unsigned long long parse_ull(char *ptr, unsigned long long max); |
|
2203 |
|
2204 int |
|
2205 main(int argc, char **argv) |
|
2206 @@ -97,7 +104,7 @@ |
|
2207 { |
|
2208 int c; |
|
2209 |
|
2210 - while ((c = getopt(argc, argv, "c:i:I:")) != -1) { |
|
2211 + while ((c = getopt(argc, argv, "c:i:I:Q:")) != -1) { |
|
2212 switch (c) { |
|
2213 case 'c': |
|
2214 if (!parse_long(optarg, &opt_count)) |
|
2215 @@ -114,6 +121,9 @@ |
|
2216 die("Bad wait time <%s>\n", optarg); |
|
2217 break; |
|
2218 |
|
2219 + case 'Q': |
|
2220 + opt_tos = parse_ull(optarg, 255); |
|
2221 + break; |
|
2222 default: |
|
2223 usage("Unknown option"); |
|
2224 } |
|
2225 @@ -142,6 +152,9 @@ |
|
2226 struct timeval next_ts; |
|
2227 struct socket socket[NSOCKETS]; |
|
2228 struct pollfd pfd[NSOCKETS]; |
|
2229 +#if !(defined(__SVR4) && defined(__sun)) |
|
2230 + int pending[NSOCKETS]; |
|
2231 +#endif |
|
2232 int i, next = 0; |
|
2233 |
|
2234 for (i = 0; i < NSOCKETS; ++i) { |
|
2235 @@ -152,10 +165,18 @@ |
|
2236 socket[i].fd = fd; |
|
2237 pfd[i].fd = fd; |
|
2238 pfd[i].events = POLLIN; |
|
2239 +#if !(defined(__SVR4) && defined(__sun)) |
|
2240 + pending[i] = 0; |
|
2241 +#endif |
|
1088 } |
2242 } |
1089 |
2243 |
1090 memset(&sin, 0, sizeof(sin)); |
2244 memset(&sin, 0, sizeof(sin)); |
1091 +#if defined(__SVR4) && defined(__sun) |
2245 +#if defined(__SVR4) && defined(__sun) |
1092 + sin.sin_family = AF_INET_OFFLOAD; |
2246 + sin.sin_family = AF_INET_OFFLOAD; |
1095 +#endif |
2249 +#endif |
1096 + |
2250 + |
1097 sin.sin_addr = opt_dstaddr; |
2251 sin.sin_addr = opt_dstaddr; |
1098 |
2252 |
1099 gettimeofday(&next_ts, NULL); |
2253 gettimeofday(&next_ts, NULL); |
1100 @@ -181,7 +190,7 @@ |
2254 @@ -180,14 +201,32 @@ |
2255 if (opt_count && sent >= opt_count) |
|
1101 break; |
2256 break; |
1102 |
2257 |
1103 timeradd(&next_ts, &opt_wait, &next_ts); |
2258 - timeradd(&next_ts, &opt_wait, &next_ts); |
1104 - if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin))) |
2259 - if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin))) |
1105 + if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)) < 0) |
2260 - err = errno; |
1106 err = errno; |
2261 - sp->sent_id = ++sent; |
1107 sp->sent_id = ++sent; |
2262 - sp->sent_ts = now; |
1108 sp->sent_ts = now; |
2263 - sp->nreplies = 0; |
1109 @@ -258,7 +267,11 @@ |
2264 - next = (next + 1) % NSOCKETS; |
2265 + timeradd(&now, &opt_wait, &next_ts); |
|
2266 +#if !(defined(__SVR4) && defined(__sun)) |
|
2267 + if (!pending[next]) { |
|
2268 +#endif |
|
2269 + memset(&sin, 0, sizeof(sin)); |
|
2270 +#if defined(__SVR4) && defined(__sun) |
|
2271 + sin.sin_family = AF_INET_OFFLOAD; |
|
2272 +#else |
|
2273 + sin.sin_family = AF_INET; |
|
2274 +#endif |
|
2275 + sin.sin_addr = opt_dstaddr; |
|
2276 |
|
2277 + if (sendto(sp->fd, NULL, 0, 0, (struct sockaddr *) &sin, sizeof(sin)) < 0) |
|
2278 + err = errno; |
|
2279 + sp->sent_id = ++sent; |
|
2280 + sp->sent_ts = now; |
|
2281 + sp->nreplies = 0; |
|
2282 +#if !(defined(__SVR4) && defined(__sun)) |
|
2283 + if (!err) |
|
2284 + pending[next] = 1; |
|
2285 +#endif |
|
2286 + next = (next + 1) % NSOCKETS; |
|
2287 +#if !(defined(__SVR4) && defined(__sun)) |
|
2288 + } |
|
2289 +#endif |
|
2290 + |
|
2291 if (err) { |
|
2292 static unsigned int nerrs = 0; |
|
2293 |
|
2294 @@ -223,6 +262,9 @@ |
|
2295 report_packet(sp, &now, NULL, errno); |
|
2296 } else { |
|
2297 report_packet(sp, &now, &from.sin_addr, 0); |
|
2298 +#if !(defined(__SVR4) && defined(__sun)) |
|
2299 + pending[i] = 0; |
|
2300 +#endif |
|
2301 recv++; |
|
2302 } |
|
2303 } |
|
2304 @@ -258,7 +300,11 @@ |
|
1110 int pf; |
2305 int pf; |
1111 |
2306 |
1112 memset(&sin, 0, sizeof(sin)); |
2307 memset(&sin, 0, sizeof(sin)); |
1113 +#if defined(__SVR4) && defined(__sun) |
2308 +#if defined(__SVR4) && defined(__sun) |
1114 + sin.sin_family = AF_INET_OFFLOAD; |
2309 + sin.sin_family = AF_INET_OFFLOAD; |
1116 sin.sin_family = AF_INET; |
2311 sin.sin_family = AF_INET; |
1117 +#endif |
2312 +#endif |
1118 |
2313 |
1119 #ifdef DYNAMIC_PF_RDS |
2314 #ifdef DYNAMIC_PF_RDS |
1120 pf = discover_pf_rds(); |
2315 pf = discover_pf_rds(); |
1121 @@ -278,6 +291,9 @@ |
2316 @@ -278,6 +324,9 @@ |
1122 if (ufd < 0) |
2317 if (ufd < 0) |
1123 die_errno("unable to create UDP socket"); |
2318 die_errno("unable to create UDP socket"); |
1124 sin.sin_addr = *dst; |
2319 sin.sin_addr = *dst; |
1125 +#if defined(__SVR4) && defined(__sun) |
2320 +#if defined(__SVR4) && defined(__sun) |
1126 + sin.sin_family = AF_INET; |
2321 + sin.sin_family = AF_INET; |
1127 +#endif |
2322 +#endif |
1128 sin.sin_port = htons(1); |
2323 sin.sin_port = htons(1); |
1129 if (connect(ufd, (struct sockaddr *) &sin, sizeof(sin)) < 0) |
2324 if (connect(ufd, (struct sockaddr *) &sin, sizeof(sin)) < 0) |
1130 die_errno("unable to connect to %s", |
2325 die_errno("unable to connect to %s", |
1131 @@ -289,6 +305,9 @@ |
2326 @@ -289,6 +338,9 @@ |
1132 |
2327 |
1133 *src = sin.sin_addr; |
2328 *src = sin.sin_addr; |
1134 close(ufd); |
2329 close(ufd); |
1135 +#if defined(__SVR4) && defined(__sun) |
2330 +#if defined(__SVR4) && defined(__sun) |
1136 + sin.sin_family = AF_INET_OFFLOAD; |
2331 + sin.sin_family = AF_INET_OFFLOAD; |
1137 +#endif |
2332 +#endif |
1138 } |
2333 } |
1139 |
2334 |
1140 sin.sin_addr = *src; |
2335 sin.sin_addr = *src; |
2336 @@ -297,6 +349,9 @@ |
|
2337 if (bind(fd, (struct sockaddr *) &sin, sizeof(sin))) |
|
2338 die_errno("bind() failed"); |
|
2339 |
|
2340 + if (opt_tos && ioctl(fd, SIOCRDSSETTOS, &opt_tos)) |
|
2341 + die_errno("ERROR: failed to set TOS\n"); |
|
2342 + |
|
2343 return fd; |
|
2344 } |
|
2345 |
|
2346 @@ -309,7 +364,8 @@ |
|
2347 "%s\nUsage: rds-ping [options] dst_addr\n" |
|
2348 "Options:\n" |
|
2349 " -c count limit packet count\n" |
|
2350 - " -I interface source IP address\n", |
|
2351 + " -I interface source IP address\n" |
|
2352 + " -Q tos type of service\n", |
|
2353 complaint); |
|
2354 exit(1); |
|
2355 } |
|
2356 @@ -384,3 +440,31 @@ |
|
2357 return 0; |
|
2358 } |
|
2359 |
|
2360 +static unsigned long long parse_ull(char *ptr, unsigned long long max) |
|
2361 +{ |
|
2362 + unsigned long long val; |
|
2363 + char *endptr; |
|
2364 + |
|
2365 + val = strtoull(ptr, &endptr, 0); |
|
2366 + switch (*endptr) { |
|
2367 + case 'k': case 'K': |
|
2368 + val <<= 10; |
|
2369 + endptr++; |
|
2370 + break; |
|
2371 + |
|
2372 + case 'm': case 'M': |
|
2373 + val <<= 20; |
|
2374 + endptr++; |
|
2375 + break; |
|
2376 + |
|
2377 + case 'g': case 'G': |
|
2378 + val <<= 30; |
|
2379 + endptr++; |
|
2380 + break; |
|
2381 + } |
|
2382 + |
|
2383 + if (*ptr && !*endptr && val <= max) |
|
2384 + return val; |
|
2385 + |
|
2386 + die("invalid number '%s'\n", ptr); |
|
2387 +} |
|
1141 diff -r -u /tmp/rds-tools-2.0.4/Makefile.in rds-tools-2.0.7/Makefile.in |
2388 diff -r -u /tmp/rds-tools-2.0.4/Makefile.in rds-tools-2.0.7/Makefile.in |
1142 --- /tmp/rds-tools-2.0.4/Makefile.in Wed Aug 4 15:25:11 2010 |
2389 --- /tmp/rds-tools-2.0.4/Makefile.in Wed Aug 4 15:25:11 2010 |
1143 +++ rds-tools-2.0.7/Makefile.in Thu Feb 24 13:27:51 2011 |
2390 +++ rds-tools-2.0.7/Makefile.in Thu Feb 24 13:27:51 2011 |
1144 @@ -4,18 +4,22 @@ |
2391 @@ -4,18 +4,22 @@ |
1145 mandir = $(DESTDIR)@mandir@ |
2392 mandir = $(DESTDIR)@mandir@ |
1288 |
2535 |
1289 |
2536 |
1290 diff -r -u /tmp/rds-tools-2.0.4/rds-stress.1 rds-tools-2.0.7/rds-stress.1 |
2537 diff -r -u /tmp/rds-tools-2.0.4/rds-stress.1 rds-tools-2.0.7/rds-stress.1 |
1291 --- /tmp/rds-tools-2.0.4/rds-stress.1 Wed Aug 4 15:25:11 2010 |
2538 --- /tmp/rds-tools-2.0.4/rds-stress.1 Wed Aug 4 15:25:11 2010 |
1292 +++ rds-tools-2.0.7/rds-stress.1 Thu Feb 24 13:27:52 2011 |
2539 +++ rds-tools-2.0.7/rds-stress.1 Thu Feb 24 13:27:52 2011 |
1293 @@ -1,99 +1,102 @@ |
2540 @@ -1,99 +1,106 @@ |
1294 -.Dd May 15, 2007 |
2541 -.Dd May 15, 2007 |
1295 -.Dt RDS-STRESS 1 |
2542 -.Dt RDS-STRESS 1 |
1296 -.Os |
2543 -.Os |
1297 -.Sh NAME |
2544 -.Sh NAME |
1298 -.Nm rds-stress |
2545 -.Nm rds-stress |
1319 +.PP |
2566 +.PP |
1320 +.SH SYNOPSIS |
2567 +.SH SYNOPSIS |
1321 +.HP |
2568 +.HP |
1322 +.nf |
2569 +.nf |
1323 +rds-stress [-p port_number] -r [receive_address] [-s send_address] |
2570 +rds-stress [-p port_number] -r [receive_address] [-s send_address] |
1324 + [-a ack_bytes] [-q request_bytes] [-D rdma_bytes] |
2571 + [-Q tos] [-a ack_bytes] [-q request_bytes] [-D rdma_bytes] |
1325 + [-d queue_depth] [-t nr_tasks] [-c] [-R] [-V] [-v] |
2572 + [-d queue_depth] [-t nr_tasks] [-T time] [-c] [-R] [-V] [-v] |
1326 +.fi |
2573 +.fi |
1327 |
2574 |
1328 -.Sh DESCRIPTION |
2575 -.Sh DESCRIPTION |
1329 -.Nm rds-stress |
2576 -.Nm rds-stress |
1330 + |
2577 + |
1421 obtain the address once the control connection is established. |
2668 obtain the address once the control connection is established. |
1422 The active process will choose a local address based on the interface through |
2669 The active process will choose a local address based on the interface through |
1423 which it connects to the destination address. |
2670 which it connects to the destination address. |
1424 -.It Fl a Ar ack_bytes |
2671 -.It Fl a Ar ack_bytes |
1425 +.TP |
2672 +.TP |
2673 +\fB\-Q tos |
|
2674 +Uses the RDS connection between IP addresses with the specified tos value. By |
|
2675 +default, the base (tos = 0) RDS connection is used. Valid values are 0-255. |
|
2676 +.TP |
|
1426 +\fB\-a ack_bytes |
2677 +\fB\-a ack_bytes |
1427 This specifies the size of the ack messages, in bytes. There is a minimum size |
2678 This specifies the size of the ack messages, in bytes. There is a minimum size |
1428 which depends on the format of the ack messages, which may change over time. |
2679 which depends on the format of the ack messages, which may change over time. |
1429 See section "Message Sizes" below. |
2680 See section "Message Sizes" below. |
1430 -.It Fl q Ar request_bytes |
2681 -.It Fl q Ar request_bytes |
1437 +.TP |
2688 +.TP |
1438 +\fB\-D rdma_bytes |
2689 +\fB\-D rdma_bytes |
1439 RDSv3 is capable of transmitting part of a message via RDMA directly from |
2690 RDSv3 is capable of transmitting part of a message via RDMA directly from |
1440 application buffer to application buffer. This option enables RDMA support |
2691 application buffer to application buffer. This option enables RDMA support |
1441 in rds-stress: request packets include parameters for an RDMA READ or WRITE |
2692 in rds-stress: request packets include parameters for an RDMA READ or WRITE |
1442 @@ -100,20 +103,25 @@ |
2693 @@ -100,20 +107,25 @@ |
1443 operation, which the receiving process executes at the time the ACK packet |
2694 operation, which the receiving process executes at the time the ACK packet |
1444 is sent. |
2695 is sent. |
1445 See section "Message Sizes" below. |
2696 See section "Message Sizes" below. |
1446 -.It Fl d Ar queue_depth |
2697 -.It Fl d Ar queue_depth |
1447 +.TP |
2698 +.TP |
1468 +.TP |
2719 +.TP |
1469 +\fB\-c |
2720 +\fB\-c |
1470 This causes rds-stress to create child tasks which just consume CPU cycles. |
2721 This causes rds-stress to create child tasks which just consume CPU cycles. |
1471 One task is created for each CPU in the system. First each child observes the |
2722 One task is created for each CPU in the system. First each child observes the |
1472 maximum rate at which it can consume cycles. This means that this option |
2723 maximum rate at which it can consume cycles. This means that this option |
1473 @@ -121,50 +129,67 @@ |
2724 @@ -121,50 +133,67 @@ |
1474 use of the system by observing the lesser rate at which the children consume |
2725 use of the system by observing the lesser rate at which the children consume |
1475 cycles. This option is *not* shared between the active and passive instances. |
2726 cycles. This option is *not* shared between the active and passive instances. |
1476 It must be specified on each rds-stress command line. |
2727 It must be specified on each rds-stress command line. |
1477 -.It Fl R |
2728 -.It Fl R |
1478 +.TP |
2729 +.TP |
1535 +mbi K/s |
2786 +mbi K/s |
1536 +The total number of bytes that are being received via RDMA READs and |
2787 +The total number of bytes that are being received via RDMA READs and |
1537 WRITEs for all children. |
2788 WRITEs for all children. |
1538 -.It tx us/c |
2789 -.It tx us/c |
1539 +.TP |
2790 +.TP |
1540 +mbi K/s |
2791 +mbo K/s |
1541 +The total number of bytes that are being transmited via RDMA READs and |
2792 +The total number of bytes that are being transmited via RDMA READs and |
1542 +WRITEs for all children. |
2793 +WRITEs for all children. |
1543 +.TP |
2794 +.TP |
1544 +tx us/c |
2795 +tx us/c |
1545 The average number of microseconds spent in sendmsg() calls. |
2796 The average number of microseconds spent in sendmsg() calls. |
1555 +.TP |
2806 +.TP |
1556 +cpu % |
2807 +cpu % |
1557 This is the percentage of available CPU resources on this machine that are being |
2808 This is the percentage of available CPU resources on this machine that are being |
1558 consumed since rds-stress started running. It will show -1.00 if -c is not |
2809 consumed since rds-stress started running. It will show -1.00 if -c is not |
1559 given. It is calculated based on the amount of CPU resources that CPU soaking |
2810 given. It is calculated based on the amount of CPU resources that CPU soaking |
1560 @@ -171,4 +196,3 @@ |
2811 @@ -171,4 +200,3 @@ |
1561 tasks are able to consume. This lets it measure CPU use by the system, say in |
2812 tasks are able to consume. This lets it measure CPU use by the system, say in |
1562 interrupt handlers, that task-based CPU accounting does not include. |
2813 interrupt handlers, that task-based CPU accounting does not include. |
1563 For this to work rds-stress must be started with -c on an idle system. |
2814 For this to work rds-stress must be started with -c on an idle system. |
1564 -.El |
2815 -.El |
1565 diff -r -u /tmp/rds-tools-2.0.4/include/rds.h rds-tools-2.0.7/include/rds.h |
2816 diff -r -u /tmp/rds-tools-2.0.4/include/rds.h rds-tools-2.0.7/include/rds.h |