83 @@ -63,14 +69,14 @@ |
83 @@ -63,14 +69,14 @@ |
84 */ |
84 */ |
85 static double sample_get_cpu_mhz(void) |
85 static double sample_get_cpu_mhz(void) |
86 { |
86 { |
87 - struct timeval tv1, tv2; |
87 - struct timeval tv1, tv2; |
88 + cycles_t tv1, tv2; |
88 + struct timeval tv1, tv2; |
89 cycles_t start; |
89 cycles_t start; |
90 double sx = 0, sy = 0, sxx = 0, syy = 0, sxy = 0; |
90 double sx = 0, sy = 0, sxx = 0, syy = 0, sxy = 0; |
91 - double tx, ty; |
91 - double tx, ty; |
92 + cycles_t tx, ty; |
92 + cycles_t tx, ty; |
93 int i; |
93 int i; |
96 - long x[MEASUREMENTS]; |
96 - long x[MEASUREMENTS]; |
97 + cycles_t x[MEASUREMENTS]; |
97 + cycles_t x[MEASUREMENTS]; |
98 cycles_t y[MEASUREMENTS]; |
98 cycles_t y[MEASUREMENTS]; |
99 double a; /* system call overhead in cycles */ |
99 double a; /* system call overhead in cycles */ |
100 double b; /* cycles per microsecond */ |
100 double b; /* cycles per microsecond */ |
101 @@ -78,25 +84,16 @@ |
101 @@ -78,7 +84,6 @@ |
102 |
102 |
103 for (i = 0; i < MEASUREMENTS; ++i) { |
103 for (i = 0; i < MEASUREMENTS; ++i) { |
104 start = get_cycles(); |
104 start = get_cycles(); |
105 + tv1 = get_cycles(); |
105 - |
106 |
106 if (gettimeofday(&tv1, NULL)) { |
107 - if (gettimeofday(&tv1, NULL)) { |
107 fprintf(stderr, "gettimeofday failed.\n"); |
108 - fprintf(stderr, "gettimeofday failed.\n"); |
108 return 0; |
109 - return 0; |
109 @@ -86,7 +91,7 @@ |
110 - } |
110 |
111 - |
|
112 do { |
111 do { |
113 - if (gettimeofday(&tv2, NULL)) { |
112 if (gettimeofday(&tv2, NULL)) { |
114 - fprintf(stderr, "gettimeofday failed.\n"); |
113 - fprintf(stderr, "gettimeofday failed.\n"); |
115 - return 0; |
114 + fprintf(stderr, "gettimeofday failed.\n"); |
116 - } |
115 return 0; |
117 - } while ((tv2.tv_sec - tv1.tv_sec) * 1000000 + |
116 } |
118 - (tv2.tv_usec - tv1.tv_usec) < USECSTART + i * USECSTEP); |
117 } while ((tv2.tv_sec - tv1.tv_sec) * 1000000 + |
119 + tv2 = get_cycles(); |
118 @@ -94,9 +99,10 @@ |
120 + } while ((tv2 - tv1) < NSECSTART + i * NSECSTEP); |
119 |
121 |
120 x[i] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + |
122 - x[i] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + |
121 tv2.tv_usec - tv1.tv_usec; |
123 - tv2.tv_usec - tv1.tv_usec; |
122 + |
124 + x[i] = (tv2 - tv1); |
|
125 y[i] = get_cycles() - start; |
123 y[i] = get_cycles() - start; |
126 if (DEBUG_DATA) |
124 if (DEBUG_DATA) |
127 - fprintf(stderr, "x=%ld y=%Ld\n", x[i], (long long)y[i]); |
125 - fprintf(stderr, "x=%ld y=%Ld\n", x[i], (long long)y[i]); |
128 + fprintf(stderr, "x=%lld y=%lld\n", x[i], y[i]); |
126 + fprintf(stderr, "x=%lld y=%lld\n", x[i], y[i]); |
129 } |
127 } |
130 |
128 |
131 for (i = 0; i < MEASUREMENTS; ++i) { |
129 for (i = 0; i < MEASUREMENTS; ++i) { |
132 @@ -134,10 +131,15 @@ |
130 @@ -134,10 +140,15 @@ |
133 |
131 |
134 static double proc_get_cpu_mhz(int no_cpu_freq_fail) |
132 static double proc_get_cpu_mhz(int no_cpu_freq_fail) |
135 { |
133 { |
136 +#if !(defined(__SVR4) && defined(__sun)) |
134 +#if !(defined(__SVR4) && defined(__sun)) |
137 FILE* f; |
135 FILE* f; |
157 + free(info); |
155 + free(info); |
158 +#endif |
156 +#endif |
159 return mhz; |
157 return mhz; |
160 } |
158 } |
161 |
159 |
162 @@ -183,10 +192,10 @@ |
160 @@ -184,8 +202,9 @@ |
163 double sample, proc, delta; |
|
164 sample = sample_get_cpu_mhz(); |
161 sample = sample_get_cpu_mhz(); |
165 proc = proc_get_cpu_mhz(no_cpu_freq_fail); |
162 proc = proc_get_cpu_mhz(no_cpu_freq_fail); |
166 +#if defined(__SVR4) && defined(__sun) |
|
167 + sample = sample * proc; |
|
168 +#endif |
|
169 |
163 |
170 - if (!proc || !sample) |
164 - if (!proc || !sample) |
171 - return 0; |
165 - return 0; |
172 - |
166 +#if defined(__sparc) |
|
167 + return proc; |
|
168 +#endif |
|
169 |
173 delta = proc > sample ? proc - sample : sample - proc; |
170 delta = proc > sample ? proc - sample : sample - proc; |
174 if (delta / proc > 0.01) { |
171 if (delta / proc > 0.01) { |
175 fprintf(stderr, "Warning: measured timestamp frequency " |
172 @@ -194,5 +213,6 @@ |
|
173 sample, proc); |
|
174 return sample; |
|
175 } |
|
176 + |
|
177 return proc; |
|
178 } |
176 diff -r -u /tmp/perftest-1.3.0/get_clock.h perftest-1.3.0/get_clock.h |
179 diff -r -u /tmp/perftest-1.3.0/get_clock.h perftest-1.3.0/get_clock.h |
177 --- /tmp/perftest-1.3.0/get_clock.h Sun Nov 1 03:09:16 2009 |
180 --- /tmp/perftest-1.3.0/get_clock.h Sun Nov 1 03:09:16 2009 |
178 +++ perftest-1.3.0/get_clock.h Fri Feb 11 04:12:46 2011 |
181 +++ perftest-1.3.0/get_clock.h Fri Feb 11 04:12:46 2011 |
179 @@ -36,8 +36,18 @@ |
182 @@ -36,8 +36,18 @@ |
180 |
183 |
181 #ifndef GET_CLOCK_H |
184 #ifndef GET_CLOCK_H |
182 #define GET_CLOCK_H |
185 #define GET_CLOCK_H |
183 +#if defined(__SVR4) && defined(__sun) |
186 +#if defined(__sparc) |
184 +#include <sys/times.h> |
187 +#include <sys/times.h> |
185 +#include <limits.h> |
188 +#include <limits.h> |
186 +#include <sys/time.h> |
189 +#include <sys/time.h> |
187 +typedef hrtime_t cycles_t; |
190 +typedef unsigned long long cycles_t; |
188 |
191 |
189 -#if defined (__x86_64__) || defined(__i386__) |
192 -#if defined (__x86_64__) || defined(__i386__) |
190 +static inline cycles_t get_cycles() |
193 +static inline cycles_t get_cycles() |
191 +{ |
194 +{ |
192 + return (gethrtime()); |
195 + return (gethrtime()); |
193 +} |
196 +} |
194 + |
197 + |
195 +#elif defined (__x86_64__) || defined(__i386__) |
198 +#elif defined (__x86_64__) || defined(__i386__) |
196 /* Note: only x86 CPUs which have rdtsc instruction are supported. */ |
199 /* Note: only x86 CPUs which have rdtsc instruction are supported. */ |
197 typedef unsigned long long cycles_t; |
200 typedef unsigned long long cycles_t; |
1536 #include <time.h> |
1539 #include <time.h> |
1537 +#include <inttypes.h> |
1540 +#include <inttypes.h> |
1538 #include <infiniband/verbs.h> |
1541 #include <infiniband/verbs.h> |
1539 |
1542 |
1540 #include "get_clock.h" |
1543 #include "get_clock.h" |
1541 @@ -358,11 +359,20 @@ |
1544 @@ -358,10 +359,19 @@ |
1542 |
1545 |
1543 |
1546 |
1544 if (user_param->r_flag->cycles) { |
1547 if (user_param->r_flag->cycles) { |
1545 +#if !(defined(__SVR4) && defined(__sun)) |
1548 +#if !(defined(__sparc)) |
1546 cycles_to_units = 1; |
1549 cycles_to_units = 1; |
1547 +#else |
1550 +#else |
1548 + cycles_to_units = |
1551 + cycles_to_units = |
1549 + (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000; |
1552 + (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000; |
1550 +#endif |
1553 +#endif |
1551 units = "cycles"; |
1554 units = "cycles"; |
1552 } else { |
1555 } else { |
1553 +#if !(defined(__SVR4) && defined(__sun)) |
1556 +#if !(defined(__sparc)) |
1554 cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f); |
1557 cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f); |
1555 +#else |
1558 +#else |
1556 + cycles_to_units = 1000; |
1559 + cycles_to_units = 1000; |
|
1560 +#endif |
1557 units = "usec"; |
1561 units = "usec"; |
1558 +#endif |
1562 } |
1559 } |
1563 |
1560 |
|
1561 if (user_param->r_flag->unsorted) { |
|
1562 diff -r -u /tmp/perftest-1.3.0/send_bw.c perftest-1.3.0/send_bw.c |
1564 diff -r -u /tmp/perftest-1.3.0/send_bw.c perftest-1.3.0/send_bw.c |
1563 --- /tmp/perftest-1.3.0/send_bw.c Thu Jan 20 07:37:18 2011 |
1565 --- /tmp/perftest-1.3.0/send_bw.c Thu Jan 20 07:37:18 2011 |
1564 +++ perftest-1.3.0/send_bw.c Fri Feb 11 04:12:47 2011 |
1566 +++ perftest-1.3.0/send_bw.c Fri Feb 11 04:12:47 2011 |
1565 @@ -1,1162 +1,1166 @@ |
1567 @@ -1,1162 +1,1166 @@ |
1566 -/* |
1568 -/* |
3968 - ctx->rcq = ibv_create_cq(ctx->context,user_parm->rx_depth*user_parm->num_of_qps,NULL,ctx->channel,0); |
3970 - ctx->rcq = ibv_create_cq(ctx->context,user_parm->rx_depth*user_parm->num_of_qps,NULL,ctx->channel,0); |
3969 + ctx->rcq = ibv_create_cq(ctx->context,user_parm->rx_depth*user_parm->num_of_qps,NULL,ctx->rx_channel,0); |
3971 + ctx->rcq = ibv_create_cq(ctx->context,user_parm->rx_depth*user_parm->num_of_qps,NULL,ctx->rx_channel,0); |
3970 if (!ctx->rcq) { |
3972 if (!ctx->rcq) { |
3971 fprintf(stderr, "Couldn't create CQ\n"); |
3973 fprintf(stderr, "Couldn't create CQ\n"); |
3972 return NULL; |
3974 return NULL; |
3973 @@ -583,10 +598,19 @@ |
3975 @@ -581,12 +596,20 @@ |
3974 |
3976 for (i = 0; i < user_param->iters - 1; ++i) |
3975 |
3977 delta[i] = tstamp[i + 1] - tstamp[i]; |
|
3978 |
|
3979 - |
3976 if (user_param->r_flag->cycles) { |
3980 if (user_param->r_flag->cycles) { |
3977 +#if !(defined(__SVR4) && defined(__sun)) |
3981 +#if !(defined(__sparc)) |
3978 cycles_to_units = 1; |
3982 cycles_to_units = 1; |
3979 +#else |
3983 +#else |
3980 + cycles_to_units = |
3984 + cycles_to_units = |
3981 + (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000; |
3985 + (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000; |
3982 +#endif |
3986 +#endif |
3983 units = "cycles"; |
3987 units = "cycles"; |
3984 } else { |
3988 } else { |
3985 +#if !(defined(__SVR4) && defined(__sun)) |
3989 +#if !(defined(__sparc)) |
3986 cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f); |
3990 cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f); |
3987 +#else |
3991 +#else |
3988 + cycles_to_units = 1000; |
3992 + cycles_to_units = 1000; |
3989 +#endif |
3993 +#endif |
3990 units = "usec"; |
3994 units = "usec"; |
3991 } |
3995 } |
3992 |
3996 |
3993 @@ -649,7 +673,7 @@ |
3997 @@ -649,7 +672,7 @@ |
3994 |
3998 |
3995 // Server is polling on recieve first . |
3999 // Server is polling on recieve first . |
3996 if (user_param->use_event) { |
4000 if (user_param->use_event) { |
3997 - if (ctx_notify_events(ctx->rcq,ctx->channel)) { |
4001 - if (ctx_notify_events(ctx->rcq,ctx->channel)) { |
3998 + if (ctx_notify_events(ctx->rcq, ctx->rx_channel)) { |
4002 + if (ctx_notify_events(ctx->rcq, ctx->rx_channel)) { |
3999 fprintf(stderr , " Failed to notify events to CQ"); |
4003 fprintf(stderr , " Failed to notify events to CQ"); |
4000 return 1; |
4004 return 1; |
4001 } |
4005 } |
4002 @@ -701,7 +725,7 @@ |
4006 @@ -701,7 +724,7 @@ |
4003 int s_ne; |
4007 int s_ne; |
4004 |
4008 |
4005 if (user_param->use_event) { |
4009 if (user_param->use_event) { |
4006 - if (ctx_notify_events(ctx->scq,ctx->channel)) { |
4010 - if (ctx_notify_events(ctx->scq,ctx->channel)) { |
4007 + if (ctx_notify_events(ctx->scq, ctx->tx_channel)) { |
4011 + if (ctx_notify_events(ctx->scq, ctx->tx_channel)) { |
4039 - if (is_dev_hermon(ctx->context) != NOT_HERMON && user_parm->inline_size != 0) |
4043 - if (is_dev_hermon(ctx->context) != NOT_HERMON && user_parm->inline_size != 0) |
4040 + if (is_dev_hermon(ctx->context) == NOT_HERMON && user_parm->inline_size != 0) |
4044 + if (is_dev_hermon(ctx->context) == NOT_HERMON && user_parm->inline_size != 0) |
4041 user_parm->inline_size = 0; |
4045 user_parm->inline_size = 0; |
4042 |
4046 |
4043 printf(" Inline data is used up to %d bytes message\n", user_parm->inline_size); |
4047 printf(" Inline data is used up to %d bytes message\n", user_parm->inline_size); |
4044 @@ -384,7 +385,11 @@ |
4048 @@ -368,7 +369,6 @@ |
|
4049 cycles_t t; |
|
4050 int iters = user_param->iters; |
|
4051 |
|
4052 - |
|
4053 opt_delta = tcompleted[opt_posted] - tposted[opt_completed]; |
|
4054 |
|
4055 if (user_param->noPeak == OFF) { |
|
4056 @@ -384,7 +384,11 @@ |
4045 } |
4057 } |
4046 } |
4058 } |
4047 |
4059 |
4048 +#if !(defined(__SVR4) && defined(__sun)) |
4060 +#if !(defined(__sparc)) |
4049 cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000; |
4061 cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000; |
4050 +#else |
4062 +#else |
4051 + cycles_to_units = 1000000000; |
4063 + cycles_to_units = 1000000000; |
4052 +#endif |
4064 +#endif |
4053 |
4065 |
4089 #include "get_clock.h" |
4101 #include "get_clock.h" |
4090 @@ -330,10 +331,19 @@ |
4102 @@ -330,10 +331,19 @@ |
4091 |
4103 |
4092 |
4104 |
4093 if (user_param->r_flag->cycles) { |
4105 if (user_param->r_flag->cycles) { |
4094 +#if !(defined(__SVR4) && defined(__sun)) |
4106 +#if !(defined(__sparc)) |
4095 cycles_to_units = 1; |
4107 cycles_to_units = 1; |
4096 +#else |
4108 +#else |
4097 + cycles_to_units = |
4109 + cycles_to_units = |
4098 + (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000; |
4110 + (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000; |
4099 +#endif |
4111 +#endif |
4100 units = "cycles"; |
4112 units = "cycles"; |
4101 } else { |
4113 } else { |
4102 +#if !(defined(__SVR4) && defined(__sun)) |
4114 +#if !(defined(__sparc)) |
4103 cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f); |
4115 cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f); |
4104 +#else |
4116 +#else |
4105 + cycles_to_units = 1000; |
4117 + cycles_to_units = 1000; |
4106 +#endif |
4118 +#endif |
4107 units = "usec"; |
4119 units = "usec"; |