components/open-fabrics/perftest/patches/base.patch
author Sharath M Srinivasan <sharath.srinivasan@oracle.com>
Tue, 05 Feb 2013 19:07:23 -0800
changeset 1144 b219ca1112b6
parent 715 eed3ed08f692
child 1563 33a996257127
permissions -rw-r--r--
15825425 - SUNBT7206756 qperf: sdp_bw doesn't work with IPv6 15791672 - SUNBT7168806 qperf should not bind to cpu 0 by default 16075288 - qperf/rds_bw failed with 'connect failed: Connection refused' 15821409 - SUNBT7203619 ib_write_bw reports peak b/w as -0.00

diff -r -u /tmp/perftest-1.3.0/Makefile perftest-1.3.0/Makefile
--- /tmp/perftest-1.3.0/Makefile	Thu Jan 20 01:37:35 2011
+++ perftest-1.3.0/Makefile	Fri Feb 11 04:12:45 2011
@@ -2,10 +2,11 @@
 MCAST_TESTS = send_bw send_lat
 TESTS = write_bw_postlist write_lat write_bw read_lat read_bw
 UTILS = clock_test
+BINS = ib_write_bw_postlist rdma_lat rdma_bw ib_send_lat ib_send_bw ib_write_lat ib_write_bw ib_read_lat ib_read_bw ib_clock_test
 
 all: ${RDMACM_TESTS} ${MCAST_TESTS} ${TESTS} ${UTILS}
 
-CFLAGS += -Wall -g -D_GNU_SOURCE -O2
+CFLAGS += -Wall -g -D_GNU_SOURCE -O3
 BASIC_FILES = get_clock.c
 EXTRA_FILES = perftest_resources.c
 MCAST_FILES = multicast_resources.c
@@ -12,13 +13,14 @@
 BASIC_HEADERS = get_clock.h
 EXTRA_HEADERS = perftest_resources.h
 MCAST_HEADERS = multicast_resources.h
+CC=gcc
 #The following seems to help GNU make on some platforms
 LOADLIBES += 
 LDFLAGS +=
 
-${RDMACM_TESTS}: LOADLIBES += -libverbs -lrdmacm
-${MCAST_TESTS}: LOADLIBES += -libverbs -libumad -lm
-${TESTS} ${UTILS}: LOADLIBES += -libverbs
+${RDMACM_TESTS}: LOADLIBES += -lsocket -libverbs -lrdmacm -lrt 
+${MCAST_TESTS}: LOADLIBES += -lsocket -libverbs -lrdmacm -lrt -libumad -lm
+${TESTS} ${UTILS}: LOADLIBES += -lsocket -lrt -libverbs
 
 ${RDMACM_TESTS}: %: %.c ${BASIC_FILES} ${BASIC_HEADERS}
 	$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $< ${BASIC_FILES} $(LOADLIBES) $(LDLIBS) -o $@
@@ -27,6 +29,10 @@
 ${TESTS} ${UTILS}: %: %.c ${BASIC_FILES} ${EXTRA_FILES} ${BASIC_HEADERS} ${EXTRA_HEADERS}
 	$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $< ${BASIC_FILES} ${EXTRA_FILES} $(LOADLIBES) $(LDLIBS) -o ib_$@
 
+install: all
+	$(INSTALL) -d $(DESTDIR)$(BINDIR)
+	$(INSTALL) -m 755 -s $(BINS) $(DESTDIR)$(BINDIR)
+
 clean:
 	$(foreach fname,${RDMACM_TESTS}, rm -f ${fname})
 	$(foreach fname,${MCAST_TESTS}, rm -f ib_${fname})
diff -r -u /tmp/perftest-1.3.0/clock_test.c perftest-1.3.0/clock_test.c
--- /tmp/perftest-1.3.0/clock_test.c	Sun Nov  1 03:09:16 2009
+++ perftest-1.3.0/clock_test.c	Fri Sep 30 08:08:29 2011
@@ -20,6 +20,10 @@
 		c1 = get_cycles();
 		sleep(1);
 		c2 = get_cycles();
+#if defined(__sparc)
+		printf("1 sec = %g usec\n", (double)((c2 - c1))/1000);
+#else
 		printf("1 sec = %g usec\n", (c2 - c1) / mhz);
+#endif
 	}
 }
diff -r -u /tmp/perftest-1.3.0/get_clock.c perftest-1.3.0/get_clock.c
--- /tmp/perftest-1.3.0/get_clock.c	Sun Dec 19 06:36:26 2010
+++ perftest-1.3.0/get_clock.c	Fri Feb 11 04:12:46 2011
@@ -45,6 +45,10 @@
 #include <unistd.h>
 #include <stdio.h>
 #include "get_clock.h"
+#if defined(__SVR4) && defined(__sun)
+#include <stdlib.h>
+#include <infiniband/ofa_solaris.h>
+#endif
 
 #ifndef DEBUG
 #define DEBUG 0
@@ -56,6 +60,8 @@
 #define MEASUREMENTS 200
 #define USECSTEP 10
 #define USECSTART 100
+#define NSECSTART 100000
+#define NSECSTEP 10000
 
 /*
  Use linear regression to calculate cycles per microsecond.
@@ -63,14 +69,14 @@
 */
 static double sample_get_cpu_mhz(void)
 {
-	struct timeval tv1, tv2;
+        struct timeval tv1, tv2;
 	cycles_t start;
 	double sx = 0, sy = 0, sxx = 0, syy = 0, sxy = 0;
-	double tx, ty;
+	cycles_t tx, ty;
 	int i;
 
 	/* Regression: y = a + b x */
-	long x[MEASUREMENTS];
+	cycles_t x[MEASUREMENTS];
 	cycles_t y[MEASUREMENTS];
 	double a; /* system call overhead in cycles */
 	double b; /* cycles per microsecond */
@@ -78,7 +84,6 @@
 
 	for (i = 0; i < MEASUREMENTS; ++i) {
 		start = get_cycles();
-
 		if (gettimeofday(&tv1, NULL)) {
 			fprintf(stderr, "gettimeofday failed.\n");
 			return 0;
@@ -86,7 +91,7 @@
 
 		do {
 			if (gettimeofday(&tv2, NULL)) {
-				fprintf(stderr, "gettimeofday failed.\n");
+			        fprintf(stderr, "gettimeofday failed.\n");
 				return 0;
 			}
 		} while ((tv2.tv_sec - tv1.tv_sec) * 1000000 +
@@ -94,9 +99,10 @@
 
 		x[i] = (tv2.tv_sec - tv1.tv_sec) * 1000000 +
 			tv2.tv_usec - tv1.tv_usec;
+
 		y[i] = get_cycles() - start;
 		if (DEBUG_DATA)
-			fprintf(stderr, "x=%ld y=%Ld\n", x[i], (long long)y[i]);
+			fprintf(stderr, "x=%lld y=%lld\n", x[i], y[i]);
 	}
 
 	for (i = 0; i < MEASUREMENTS; ++i) {
@@ -134,10 +140,15 @@
 
 static double proc_get_cpu_mhz(int no_cpu_freq_fail)
 {
+#if !(defined(__SVR4) && defined(__sun))
 	FILE* f;
 	char buf[256];
+#else
+	sol_cpu_info_t	*info;
+#endif
 	double mhz = 0.0;
 
+#if !(defined(__SVR4) && defined(__sun))
 	f = fopen("/proc/cpuinfo","r");
 	if (!f)
 		return 0.0;
@@ -174,6 +185,13 @@
 		}
 	}
 	fclose(f);
+#else
+	if (sol_get_cpu_info(&info) > 0)
+		mhz = info[0].cpu_mhz;
+	else
+		return (0.0);
+	free(info);
+#endif
 	return mhz;
 }
 
@@ -184,8 +202,9 @@
 	sample = sample_get_cpu_mhz();
 	proc = proc_get_cpu_mhz(no_cpu_freq_fail);
 
-	if (!proc || !sample)
-		return 0;
+#if defined(__sparc)
+	return proc;
+#endif
 
 	delta = proc > sample ? proc - sample : sample - proc;
 	if (delta / proc > 0.01) {
@@ -194,5 +213,6 @@
 					sample, proc);
 			return sample;
 	}
+
 	return proc;
 }
diff -r -u /tmp/perftest-1.3.0/get_clock.h perftest-1.3.0/get_clock.h
--- /tmp/perftest-1.3.0/get_clock.h	Sun Nov  1 03:09:16 2009
+++ perftest-1.3.0/get_clock.h	Fri Feb 11 04:12:46 2011
@@ -36,8 +36,18 @@
 
 #ifndef GET_CLOCK_H
 #define GET_CLOCK_H
+#if defined(__sparc)
+#include <sys/times.h>
+#include <limits.h>
+#include <sys/time.h>
+typedef unsigned long long cycles_t;
 
-#if defined (__x86_64__) || defined(__i386__)
+static inline cycles_t get_cycles()
+{
+        return (gethrtime());
+}
+
+#elif defined (__x86_64__) || defined(__i386__)
 /* Note: only x86 CPUs which have rdtsc instruction are supported. */
 typedef unsigned long long cycles_t;
 static inline cycles_t get_cycles()
diff -r -u /tmp/perftest-1.3.0/rdma_bw.c perftest-1.3.0/rdma_bw.c
--- /tmp/perftest-1.3.0/rdma_bw.c	Wed Apr  7 09:44:56 2010
+++ perftest-1.3.0/rdma_bw.c	Fri Feb 11 04:12:46 2011
@@ -50,8 +50,11 @@
 #include <malloc.h>
 #include <getopt.h>
 #include <arpa/inet.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <time.h>
+#include <inttypes.h>
 
 #include <infiniband/verbs.h>
 #include <rdma/rdma_cma.h>
@@ -144,7 +147,7 @@
 
 	if (n < 0) {
 		fprintf(stderr, "%d:%s: %s for %s:%d\n", 
-				pid, __func__, gai_strerror(n),
+				(int)pid, __func__, gai_strerror(n),
 				data->servername, data->port);
 		goto err4;
 	}
@@ -157,7 +160,7 @@
 		if (rdma_resolve_addr(data->cm_id, NULL,
 					 (struct sockaddr *)&sin, 2000)) {
 			fprintf(stderr, "%d:%s: rdma_resolve_addr failed\n",
-					 pid, __func__ );
+					 (int)pid, __func__ );
 			goto err2;
 		}
 	
@@ -172,7 +175,7 @@
 
 		if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
 			fprintf(stderr, "%d:%s: unexpected CM event %d\n", 
-				pid, __func__, event->event);
+				(int)pid, __func__, event->event);
 			goto err1;
 		}
 		rdma_ack_cm_event(event);
@@ -180,7 +183,7 @@
 retry_route:
 		if (rdma_resolve_route(data->cm_id, 2000)) {
 			fprintf(stderr, "%d:%s: rdma_resolve_route failed\n", 
-						pid, __func__);
+						(int)pid, __func__);
 			goto err2;
 		}
 	
@@ -195,7 +198,7 @@
 
 		if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {
 			fprintf(stderr, "%d:%s: unexpected CM event %d\n", 
-					pid, __func__, event->event);
+					(int)pid, __func__, event->event);
 			rdma_ack_cm_event(event);
 			goto err1;
 		}
@@ -202,7 +205,7 @@
 		rdma_ack_cm_event(event);
 		ctx = pp_init_ctx(data->cm_id, data);
 		if (!ctx) {
-			fprintf(stderr, "%d:%s: pp_init_ctx failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: pp_init_ctx failed\n", (int)pid, __func__);
 			goto err2;
 		}
 		data->my_dest.psn = lrand48() & 0xffffff;
@@ -218,7 +221,7 @@
 		conn_param.private_data_len = sizeof(data->my_dest);
 
 		if (rdma_connect(data->cm_id, &conn_param)) {
-			fprintf(stderr, "%d:%s: rdma_connect failure\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_connect failure\n", (int)pid, __func__);
 			goto err2;
 		}
 	
@@ -227,13 +230,13 @@
 	
 		if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
 			fprintf(stderr, "%d:%s: unexpected CM event %d\n", 
- 					pid, __func__, event->event);
+ 					(int)pid, __func__, event->event);
 			goto err1;
 		}
 		if (!event->param.conn.private_data || 
 		    (event->param.conn.private_data_len < sizeof(*data->rem_dest))) {
 			fprintf(stderr, "%d:%s: bad private data ptr %p len %d\n",  
-				pid, __func__, event->param.conn.private_data, 
+				(int)pid, __func__, event->param.conn.private_data, 
 				event->param.conn.private_data_len);
 			goto err1;
 		}
@@ -257,7 +260,7 @@
 		}
 		if (sockfd < 0) {
 			fprintf(stderr, "%d:%s: Couldn't connect to %s:%d\n", 
-				 pid, __func__, data->servername, data->port);
+				 (int)pid, __func__, data->servername, data->port);
 			goto err3;
 		}
 		ctx = pp_init_ctx(data->ib_dev, data);
@@ -287,13 +290,13 @@
 	int parsed;
 	
 	if (!data->use_cma) {
-		sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", data->my_dest.lid, 
+		sprintf(msg, "%04x:%06x:%06x:%08x:%016llx", data->my_dest.lid, 
 				data->my_dest.qpn, data->my_dest.psn,
 				data->my_dest.rkey, data->my_dest.vaddr);
 		if (write(data->sockfd, msg, sizeof msg) != sizeof msg) {
 			perror("client write");
 			fprintf(stderr, "%d:%s: Couldn't send local address\n", 
-					pid, __func__);
+					(int)pid, __func__);
 			goto err;
 		}
 	
@@ -300,7 +303,7 @@
 		if (read(data->sockfd, msg, sizeof msg) != sizeof msg) {
 			perror("client read");
 			fprintf(stderr, "%d:%s: Couldn't read remote address\n", 
-					pid, __func__);
+					(int)pid, __func__);
 			goto err;
 		}
 	
@@ -310,13 +313,13 @@
 		if (!data->rem_dest)
 			goto err;
 	
-		parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &data->rem_dest->lid,
+		parsed = sscanf(msg, "%x:%x:%x:%x:%llx", &data->rem_dest->lid,
 				&data->rem_dest->qpn, &data->rem_dest->psn,
 				&data->rem_dest->rkey, &data->rem_dest->vaddr);
 	
 		if (parsed != 5) {
 			fprintf(stderr, "%d:%s: Couldn't parse line <%.*s>\n",
-					pid, __func__, (int)sizeof msg, msg);
+					(int)pid, __func__, (int)sizeof msg, msg);
 			free(data->rem_dest);
 			goto err;
 		}
@@ -347,7 +350,7 @@
 		goto err5;
 
 	if ( (n = getaddrinfo(NULL, service, &hints, &res)) < 0 ) {
-		fprintf(stderr, "%d:%s: %s for port %d\n", pid, __func__, 
+		fprintf(stderr, "%d:%s: %s for port %d\n", (int)pid, __func__, 
 					gai_strerror(n), data->port);
 		goto err5;
 	}
@@ -357,12 +360,12 @@
 		sin.sin_family = AF_INET;
 		sin.sin_port = htons(data->port);
 		if (rdma_bind_addr(data->cm_id, (struct sockaddr *)&sin)) {
-			fprintf(stderr, "%d:%s: rdma_bind_addr failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_bind_addr failed\n", (int)pid, __func__);
 			goto err3;
 		}
 	
 		if (rdma_listen(data->cm_id, 0)) {
-			fprintf(stderr, "%d:%s: rdma_listen failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_listen failed\n", (int)pid, __func__);
 			goto err3;
 		}
 	
@@ -371,13 +374,13 @@
 
 		if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
 			fprintf(stderr, "%d:%s: bad event waiting for connect request %d\n", 
-				pid, __func__, event->event);
+				(int)pid, __func__, event->event);
 			goto err2;
 		}
 	
 		if (!event->param.conn.private_data ||
 		    (event->param.conn.private_data_len < sizeof(*data->rem_dest))) {
-			fprintf(stderr, "%d:%s: bad private data len %d\n", pid,
+			fprintf(stderr, "%d:%s: bad private data len %d\n", (int)pid,
 				__func__, event->param.conn.private_data_len);
 			goto err2;
 		}
@@ -405,18 +408,18 @@
 		conn_param.private_data = &data->my_dest;
 		conn_param.private_data_len = sizeof(data->my_dest);
 		if (rdma_accept(child_cm_id, &conn_param)) {
-			fprintf(stderr, "%d:%s: rdma_accept failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_accept failed\n", (int)pid, __func__);
 			goto err1;
 		}	
 		rdma_ack_cm_event(event);
 		if (rdma_get_cm_event(data->cm_channel, &event)) {
-			fprintf(stderr, "%d:%s: rdma_get_cm_event error\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_get_cm_event error\n", (int)pid, __func__);
 			rdma_destroy_id(child_cm_id);
 			goto err3;
 		}
 		if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
 			fprintf(stderr, "%d:%s: bad event waiting for established %d\n", 
-				pid, __func__, event->event);
+				(int)pid, __func__, event->event);
 			goto err1;
 		}
 		rdma_ack_cm_event(event);	
@@ -436,7 +439,7 @@
 		}
 	
 		if (sockfd < 0) {
-			fprintf(stderr, "%d:%s: Couldn't listen to port %d\n", pid,
+			fprintf(stderr, "%d:%s: Couldn't listen to port %d\n", (int)pid,
 						__func__, data->port);
 			goto err4;
 		}
@@ -445,7 +448,7 @@
 		connfd = accept(sockfd, NULL, 0);
 		if (connfd < 0) {
 			perror("server accept");
-			fprintf(stderr, "%d:%s: accept() failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: accept() failed\n", (int)pid, __func__);
 			close(sockfd);
 			goto err4;
 		}
@@ -485,7 +488,7 @@
 		if (n != sizeof msg) {
 			perror("server read");
 			fprintf(stderr, "%d:%s: %d/%d Couldn't read remote address\n", 
-						pid, __func__, n, (int) sizeof msg);
+						(int)pid, __func__, n, (int) sizeof msg);
 			goto err;
 		}
 	
@@ -495,23 +498,23 @@
 		if (!data->rem_dest)
 			goto err;
 	
-		parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &data->rem_dest->lid,
+		parsed = sscanf(msg, "%x:%x:%x:%x:%llx", &data->rem_dest->lid,
 			      &data->rem_dest->qpn, &data->rem_dest->psn,
 			      &data->rem_dest->rkey, &data->rem_dest->vaddr);
 		if (parsed != 5) {
-			fprintf(stderr, "%d:%s: Couldn't parse line <%.*s>\n", pid,
+			fprintf(stderr, "%d:%s: Couldn't parse line <%.*s>\n", (int)pid,
 						 __func__, (int)sizeof msg, msg);
 			free(data->rem_dest);
 			goto err;
 		}
 	
-		sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", data->my_dest.lid,
+		sprintf(msg, "%04x:%06x:%06x:%08x:%016llx", data->my_dest.lid,
 					 data->my_dest.qpn, data->my_dest.psn,
 					 data->my_dest.rkey, data->my_dest.vaddr);
 		if (write(data->sockfd, msg, sizeof msg) != sizeof msg) {
 			perror("server write");
 			fprintf(stderr, "%d:%s: Couldn't send local address\n", 
-					pid, __func__);
+					(int)pid, __func__);
 			free(data->rem_dest);
 			goto err;
 		}
@@ -538,7 +541,7 @@
 	ctx->buf = memalign(page_size, ctx->size * 2);
 	if (!ctx->buf) {
 		fprintf(stderr, "%d:%s: Couldn't allocate work buf.\n",
-					 pid, __func__);
+					 (int)pid, __func__);
 		return NULL;
 	}
 
@@ -548,7 +551,7 @@
 		cm_id = (struct rdma_cm_id *)ptr;
 		ctx->context = cm_id->verbs;
 		if (!ctx->context) {
-			fprintf(stderr, "%d:%s: Unbound cm_id!!\n", pid, 
+			fprintf(stderr, "%d:%s: Unbound cm_id!!\n", (int)pid, 
 							__func__);
 			return NULL;
 		}
@@ -558,7 +561,7 @@
 		ctx->context = ibv_open_device(ib_dev);
 		if (!ctx->context) {
 			fprintf(stderr, "%d:%s: Couldn't get context for %s\n", 
-				pid, __func__, ibv_get_device_name(ib_dev));
+				(int)pid, __func__, ibv_get_device_name(ib_dev));
 			return NULL;
 		}
 	}
@@ -565,7 +568,7 @@
 
 	ctx->pd = ibv_alloc_pd(ctx->context);
 	if (!ctx->pd) {
-		fprintf(stderr, "%d:%s: Couldn't allocate PD\n", pid, __func__);
+		fprintf(stderr, "%d:%s: Couldn't allocate PD\n", (int)pid, __func__);
 		return NULL;
 	}
 
@@ -575,7 +578,7 @@
 	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, ctx->size * 2,
 			     IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
 	if (!ctx->mr) {
-		fprintf(stderr, "%d:%s: Couldn't allocate MR\n", pid, __func__);
+		fprintf(stderr, "%d:%s: Couldn't allocate MR\n", (int)pid, __func__);
 		return NULL;
 	}
 
@@ -582,7 +585,7 @@
 
 	ctx->ch = ibv_create_comp_channel(ctx->context);
 	if (!ctx->ch) {
-		fprintf(stderr, "%d:%s: Couldn't create comp channel\n", pid,
+		fprintf(stderr, "%d:%s: Couldn't create comp channel\n", (int)pid,
 								 __func__);
 		return NULL;
 	}
@@ -589,7 +592,7 @@
 
 	ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
 	if (!ctx->rcq) {
-		fprintf(stderr, "%d:%s: Couldn't create recv CQ\n", pid,
+		fprintf(stderr, "%d:%s: Couldn't create recv CQ\n", (int)pid,
 								 __func__);
 		return NULL;
 	}
@@ -596,7 +599,7 @@
 
 	ctx->scq = ibv_create_cq(ctx->context, ctx->tx_depth, ctx, ctx->ch, 0);
 	if (!ctx->scq) {
-		fprintf(stderr, "%d:%s: Couldn't create send CQ\n", pid,
+		fprintf(stderr, "%d:%s: Couldn't create send CQ\n", (int)pid,
 								 __func__);
 		return NULL;
 	}
@@ -619,7 +622,7 @@
 
 	if (data->use_cma) {
 		if (rdma_create_qp(cm_id, ctx->pd, &attr)) {
-			fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
+			fprintf(stderr, "%d:%s: Couldn't create QP\n", (int)pid, __func__);
 			return NULL;
 		}
 		ctx->qp = cm_id->qp;
@@ -628,7 +631,7 @@
 	} else {
 		ctx->qp = ibv_create_qp(ctx->pd, &attr);
 		if (!ctx->qp)  {
-			fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
+			fprintf(stderr, "%d:%s: Couldn't create QP\n", (int)pid, __func__);
 			return NULL;
 		}
 		{
@@ -645,7 +648,7 @@
 					IBV_QP_PORT               |
 					IBV_QP_ACCESS_FLAGS)) {
 				fprintf(stderr, "%d:%s: Failed to modify QP to INIT\n", 
-						pid, __func__);
+						(int)pid, __func__);
 				return NULL;
 			}
 		}
@@ -679,7 +682,7 @@
 			  IBV_QP_RQ_PSN             |
 			  IBV_QP_MAX_DEST_RD_ATOMIC |
 			  IBV_QP_MIN_RNR_TIMER)) {
-		fprintf(stderr, "%d:%s: Failed to modify QP to RTR\n", pid, __func__);
+		fprintf(stderr, "%d:%s: Failed to modify QP to RTR\n", (int)pid, __func__);
 		return 1;
 	}
 
@@ -696,7 +699,7 @@
 			  IBV_QP_RNR_RETRY          |
 			  IBV_QP_SQ_PSN             |
 			  IBV_QP_MAX_QP_RD_ATOMIC)) {
-		fprintf(stderr, "%d:%s: Failed to modify QP to RTS\n", pid, __func__);
+		fprintf(stderr, "%d:%s: Failed to modify QP to RTS\n", (int)pid, __func__);
 		return 1;
 	}
 
@@ -720,7 +723,7 @@
         rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
         if (rc) {
                 perror("ibv_post_recv");
-                fprintf(stderr, "%d:%s: ibv_post_recv failed %d\n", pid,
+                fprintf(stderr, "%d:%s: ibv_post_recv failed %d\n", (int)pid,
 				 __func__, rc);
         }
 }
@@ -736,13 +739,13 @@
 	} while (ne == 0);
 
 	if (wc.status) 
-		fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc status %d\n", (int)pid, __func__,
 					 wc.status);
 	if (!(wc.opcode & IBV_WC_RECV))
-		fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc opcode %d\n", (int)pid, __func__,
 					 wc.opcode);
 	if (wc.wr_id != 0xdeadbeef) 
-		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", (int)pid, __func__,
 					 (int)wc.wr_id);
 }
 
@@ -762,7 +765,7 @@
 	ctx->wr.send_flags = IBV_SEND_SIGNALED;
 	ctx->wr.next       = NULL;
 	if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
-		fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
+		fprintf(stderr, "%d:%s: ibv_post_send failed\n", (int)pid, __func__);
 		return;
 	}
 	do {
@@ -771,13 +774,13 @@
 	} while (ne == 0);
 
 	if (wc.status) 
-		fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc status %d\n", (int)pid, __func__,
 						wc.status);
 	if (wc.opcode != IBV_WC_SEND)
-		fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__, 
+		fprintf(stderr, "%d:%s: bad wc opcode %d\n", (int)pid, __func__, 
 						wc.opcode);
 	if (wc.wr_id != 0xcafebabe) 
-		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", (int)pid, __func__,
 						(int)wc.wr_id);
 }
 
@@ -792,13 +795,13 @@
 	} while (ne == 0);
 
 	if (wc.status) 
-		fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc status %d\n", (int)pid, __func__,
 					 wc.status);
 	if (!(wc.opcode & IBV_WC_RECV))
-		fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc opcode %d\n", (int)pid, __func__,
 					 wc.opcode);
 	if (wc.wr_id != 0xdeadbeef) 
-		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", (int)pid, __func__,
 					 (int)wc.wr_id);
 	pp_post_recv(ctx);
 }
@@ -819,7 +822,7 @@
 	ctx->wr.send_flags = IBV_SEND_SIGNALED;
 	ctx->wr.next       = NULL;
 	if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
-		fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
+		fprintf(stderr, "%d:%s: ibv_post_send failed\n", (int)pid, __func__);
 		return;
 	}
 	do {
@@ -828,13 +831,13 @@
 	} while (ne == 0);
 
 	if (wc.status) 
-		fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc status %d\n", (int)pid, __func__,
 					 wc.status);
 	if (wc.opcode != IBV_WC_SEND)
-		fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc opcode %d\n", (int)pid, __func__,
 					 wc.opcode);
 	if (wc.wr_id != 0xabbaabba) 
-		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", (int)pid, __func__,
 					 (int)wc.wr_id);
 }
 
@@ -847,7 +850,7 @@
                 rc = rdma_disconnect(data.cm_id);
                 if (rc) {
 			perror("rdma_disconnect");
-			fprintf(stderr, "%d:%s: rdma disconnect error\n", pid,
+			fprintf(stderr, "%d:%s: rdma disconnect error\n", (int)pid,
 								 __func__);
 			return;
                 }
@@ -856,7 +859,7 @@
         rdma_get_cm_event(data.cm_channel, &event);
         if (event->event != RDMA_CM_EVENT_DISCONNECTED)
                 fprintf(stderr, "%d:%s: unexpected event during disconnect %d\n", 
-			pid, __func__, event->event);
+			(int)pid, __func__, event->event);
         rdma_ack_cm_event(event);
         rdma_destroy_id(data.cm_id);
         rdma_destroy_event_channel(data.cm_channel);
@@ -904,22 +907,26 @@
 			}
 		}
 
+#if !(defined(__SVR4) && defined(__sun))
 	cycles_to_units = get_cpu_mhz(0) * 1000000;
+#else
+	cycles_to_units = 1000000000;
+#endif
 
 	tsize = duplex ? 2 : 1;
 	tsize = tsize * size;
 
-	printf("\n%d: Bandwidth peak (#%d to #%d): %g MB/sec\n", pid, 
+	printf("\n%d: Bandwidth peak (#%d to #%d): %g MB/sec\n", (int)pid, 
 			 opt_posted, opt_completed,
 			 tsize * cycles_to_units / opt_delta / 0x100000);
-	printf("%d: Bandwidth average: %g MB/sec\n", pid, 
+	printf("%d: Bandwidth average: %g MB/sec\n", (int)pid, 
 			 tsize * iters * cycles_to_units /
 			 (tcompleted[iters - 1] - tposted[0]) / 0x100000);
 
-	printf("%d: Service Demand peak (#%d to #%d): %ld cycles/KB\n", pid, 
+	printf("%d: Service Demand peak (#%d to #%d): %ld cycles/KB\n", (int)pid, 
 			 opt_posted, opt_completed,
 			 (unsigned long)opt_delta * 1024 / tsize);
-	printf("%d: Service Demand Avg  : %ld cycles/KB\n", pid, 
+	printf("%d: Service Demand Avg  : %ld cycles/KB\n", (int)pid, 
 			 (unsigned long)(tcompleted[iters - 1] - tposted[0]) *
 			 1024 / (tsize * iters));	
 }
@@ -1046,7 +1053,7 @@
 	pid = getpid();
 
 	printf("%d: | port=%d | ib_port=%d | size=%d | tx_depth=%d | sl=%d | iters=%d | duplex=%d | cma=%d |\n",
-		 pid, data.port, data.ib_port, data.size, data.tx_depth,
+		 (int)pid, data.port, data.ib_port, data.size, data.tx_depth,
 		 sl, iters, duplex, data.use_cma);
 		
 	/* Done with parameter parsing. Perform setup. */
@@ -1059,12 +1066,12 @@
 		data.cm_channel = rdma_create_event_channel();
 		if (!data.cm_channel) {
 			fprintf(stderr, "%d:%s: rdma_create_event_channel failed\n",
-							 pid, __func__);
+							 (int)pid, __func__);
 			return 1;
 		}
 		if (rdma_create_id(data.cm_channel, &data.cm_id, NULL, RDMA_PS_TCP)) {
 			fprintf(stderr, "%d:%s: rdma_create_id failed\n",
-							 pid, __func__);
+							 (int)pid, __func__);
 			return 1;
 		}
 	
@@ -1079,12 +1086,17 @@
 		}
 	} else {
 		dev_list = ibv_get_device_list(NULL);
+		if (!dev_list) {
+			fprintf(stderr, "%d:%s: No IB devices found\n",
+			     (int)pid, __func__);
+			return 1;
+		}
 	
 		if (!ib_devname) {
 			data.ib_dev = dev_list[0];
 			if (!data.ib_dev) {
 				fprintf(stderr, "%d:%s: No IB devices found\n",
-							 pid, __func__);
+							 (int)pid, __func__);
 				return 1;
 			}
 		} else {
@@ -1093,7 +1105,7 @@
 					break;
 			if (!data.ib_dev) {
 				fprintf(stderr, "%d:%s: IB device %s not found\n",
-						 pid, __func__, ib_devname);
+						 (int)pid, __func__, ib_devname);
 				return 1;
 			}
 		}
@@ -1109,7 +1121,7 @@
 		data.my_dest.lid = pp_get_local_lid(ctx, data.ib_port);
 		if (!data.my_dest.lid) {
 			fprintf(stderr, "%d:%s: Local lid 0x0 detected. Is an SM running?\n",
-						pid, __func__);
+						(int)pid, __func__);
 			return 1;
 		}
 		data.my_dest.qpn = ctx->qp->qp_num;
@@ -1129,12 +1141,12 @@
 	}
 
 	printf("%d: Local address:  LID %#04x, QPN %#06x, PSN %#06x "
-			"RKey %#08x VAddr %#016Lx\n", pid, 
+			"RKey %#08x VAddr %#016llx\n", (int)pid, 
 			data.my_dest.lid, data.my_dest.qpn, data.my_dest.psn,
 			data.my_dest.rkey, data.my_dest.vaddr);	
 
 	printf("%d: Remote address: LID %#04x, QPN %#06x, PSN %#06x, "
-			"RKey %#08x VAddr %#016Lx\n\n", pid, 
+			"RKey %#08x VAddr %#016llx\n\n", (int)pid, 
 			data.rem_dest->lid, data.rem_dest->qpn, data.rem_dest->psn,
 			data.rem_dest->rkey, data.rem_dest->vaddr);
 
@@ -1219,7 +1231,7 @@
 
 			if (ibv_post_send(qp, &ctx->wr, &bad_wr)) {
 				fprintf(stderr, "%d:%s: Couldn't post send: scnt=%d\n",
-					pid, __func__, scnt);
+					(int)pid, __func__, scnt);
 				return 1;
 			}
 			++scnt;
@@ -1235,17 +1247,17 @@
 			tcompleted[ccnt] = get_cycles();
 
 			if (ne < 0) {
-				fprintf(stderr, "%d:%s: poll CQ failed %d\n", pid, 
+				fprintf(stderr, "%d:%s: poll CQ failed %d\n", (int)pid, 
 					__func__, ne);
 				return 1;
 			}
 			if (wc.status != IBV_WC_SUCCESS) {
 				fprintf(stderr, "%d:%s: Completion with error at %s:\n",
-					pid, __func__, data.servername ? "client" : "server");
+					(int)pid, __func__, data.servername ? "client" : "server");
 				fprintf(stderr, "%d:%s: Failed status %d: wr_id %d\n",
-					pid, __func__, wc.status, (int) wc.wr_id);
+					(int)pid, __func__, wc.status, (int) wc.wr_id);
 				fprintf(stderr, "%d:%s: scnt=%d, ccnt=%d\n",
-					pid, __func__, scnt, ccnt);
+					(int)pid, __func__, scnt, ccnt);
 				return 1;
 			}
 			ccnt += 1;
diff -r -u /tmp/perftest-1.3.0/rdma_lat.c perftest-1.3.0/rdma_lat.c
--- /tmp/perftest-1.3.0/rdma_lat.c	Wed Apr  7 09:44:33 2010
+++ perftest-1.3.0/rdma_lat.c	Fri Feb 11 04:12:46 2011
@@ -50,8 +50,11 @@
 #include <malloc.h>
 #include <getopt.h>
 #include <arpa/inet.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <time.h>
+#include <inttypes.h>
 
 #include <infiniband/verbs.h>
 #include <rdma/rdma_cma.h>
@@ -138,6 +141,11 @@
 	struct ibv_device *ib_dev = NULL;
 
 	dev_list = ibv_get_device_list(NULL);
+	if (!dev_list) {
+		fprintf(stderr, "%d:%s: No IB devices found\n",
+		     (int)pid, __func__);
+		return NULL;
+	}
 
 	if (!ib_devname) {
 		ib_dev = dev_list[0];
@@ -155,7 +163,8 @@
 }
 
 #define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")
-#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"
+#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016" "llx"
+#define KEY_SCAN_FMT "%04x:%06x:%06x:%08x:%016" "llx"
 
 static int pp_write_keys(int sockfd, const struct pingpong_dest *my_dest)
 {
@@ -185,7 +194,7 @@
 		return -1;
 	}
 
-	parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,
+	parsed = sscanf(msg, KEY_SCAN_FMT, &rem_dest->lid, &rem_dest->qpn,
 			&rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);
 
 	if (parsed != 5) {
@@ -220,7 +229,7 @@
 
 	if (n < 0) {
 		fprintf(stderr, "%d:%s: %s for %s:%d\n", 
-				pid, __func__, gai_strerror(n),
+				(int)pid, __func__, gai_strerror(n),
 				data->servername, data->port);
 		goto err4;
 	}
@@ -233,7 +242,7 @@
 		if (rdma_resolve_addr(data->cm_id, NULL,
 					 (struct sockaddr *)&sin, 2000)) {
 			fprintf(stderr, "%d:%s: rdma_resolve_addr failed\n",
-					 pid, __func__ );
+					 (int)pid, __func__ );
 			goto err2;
 		}
 	
@@ -248,7 +257,7 @@
 
 		if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
 			fprintf(stderr, "%d:%s: unexpected CM event %d\n", 
-				pid, __func__, event->event);
+				(int)pid, __func__, event->event);
 			goto err1;
 		}
 		rdma_ack_cm_event(event);
@@ -256,7 +265,7 @@
 retry_route:
 		if (rdma_resolve_route(data->cm_id, 2000)) {
 			fprintf(stderr, "%d:%s: rdma_resolve_route failed\n", 
-						pid, __func__);
+						(int)pid, __func__);
 			goto err2;
 		}
 	
@@ -271,7 +280,7 @@
 
 		if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {
 			fprintf(stderr, "%d:%s: unexpected CM event %d\n", 
-					pid, __func__, event->event);
+					(int)pid, __func__, event->event);
 			rdma_ack_cm_event(event);
 			goto err1;
 		}
@@ -278,7 +287,7 @@
 		rdma_ack_cm_event(event);
 		ctx = pp_init_ctx(data->cm_id, data);
 		if (!ctx) {
-			fprintf(stderr, "%d:%s: pp_init_ctx failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: pp_init_ctx failed\n", (int)pid, __func__);
 			goto err2;
 		}
 		data->my_dest.psn = lrand48() & 0xffffff;
@@ -294,7 +303,7 @@
 		conn_param.private_data_len = sizeof(data->my_dest);
 
 		if (rdma_connect(data->cm_id, &conn_param)) {
-			fprintf(stderr, "%d:%s: rdma_connect failure\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_connect failure\n", (int)pid, __func__);
 			goto err2;
 		}
 	
@@ -303,13 +312,13 @@
 	
 		if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
 			fprintf(stderr, "%d:%s: unexpected CM event %d\n", 
- 					pid, __func__, event->event);
+ 					(int)pid, __func__, event->event);
 			goto err1;
 		}
 		if (!event->param.conn.private_data || 
 		    (event->param.conn.private_data_len < sizeof(*data->rem_dest))) {
 			fprintf(stderr, "%d:%s: bad private data ptr %p len %d\n",  
-				pid, __func__, event->param.conn.private_data, 
+				(int)pid, __func__, event->param.conn.private_data, 
 				event->param.conn.private_data_len);
 			goto err1;
 		}
@@ -332,7 +341,7 @@
 		}
 		if (sockfd < 0) {
 			fprintf(stderr, "%d:%s: Couldn't connect to %s:%d\n", 
-				 pid, __func__, data->servername, data->port);
+				 (int)pid, __func__, data->servername, data->port);
 			goto err3;
 		}
 		ctx = pp_init_ctx(data->ib_dev, data);
@@ -393,7 +402,7 @@
 		goto err5;
 
 	if ( (n = getaddrinfo(NULL, service, &hints, &res)) < 0 ) {
-		fprintf(stderr, "%d:%s: %s for port %d\n", pid, __func__, 
+		fprintf(stderr, "%d:%s: %s for port %d\n", (int)pid, __func__, 
 					gai_strerror(n), data->port);
 		goto err5;
 	}
@@ -403,12 +412,12 @@
 		sin.sin_family = AF_INET;
 		sin.sin_port = htons(data->port);
 		if (rdma_bind_addr(data->cm_id, (struct sockaddr *)&sin)) {
-			fprintf(stderr, "%d:%s: rdma_bind_addr failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_bind_addr failed\n", (int)pid, __func__);
 			goto err3;
 		}
 	
 		if (rdma_listen(data->cm_id, 0)) {
-			fprintf(stderr, "%d:%s: rdma_listen failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_listen failed\n", (int)pid, __func__);
 			goto err3;
 		}
 	
@@ -417,13 +426,13 @@
 
 		if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
 			fprintf(stderr, "%d:%s: bad event waiting for connect request %d\n", 
-				pid, __func__, event->event);
+				(int)pid, __func__, event->event);
 			goto err2;
 		}
 	
 		if (!event->param.conn.private_data ||
 		    (event->param.conn.private_data_len < sizeof(*data->rem_dest))) {
-			fprintf(stderr, "%d:%s: bad private data len %d\n", pid,
+			fprintf(stderr, "%d:%s: bad private data len %d\n", (int)pid,
 				__func__, event->param.conn.private_data_len);
 			goto err2;
 		}
@@ -451,18 +460,18 @@
 		conn_param.private_data = &data->my_dest;
 		conn_param.private_data_len = sizeof(data->my_dest);
 		if (rdma_accept(child_cm_id, &conn_param)) {
-			fprintf(stderr, "%d:%s: rdma_accept failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_accept failed\n", (int)pid, __func__);
 			goto err1;
 		}	
 		rdma_ack_cm_event(event);
 		if (rdma_get_cm_event(data->cm_channel, &event)) {
-			fprintf(stderr, "%d:%s: rdma_get_cm_event error\n", pid, __func__);
+			fprintf(stderr, "%d:%s: rdma_get_cm_event error\n", (int)pid, __func__);
 			rdma_destroy_id(child_cm_id);
 			goto err3;
 		}
 		if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
 			fprintf(stderr, "%d:%s: bad event waiting for established %d\n", 
-				pid, __func__, event->event);
+				(int)pid, __func__, event->event);
 			goto err1;
 		}
 		rdma_ack_cm_event(event);	
@@ -482,7 +491,7 @@
 		}
 	
 		if (sockfd < 0) {
-			fprintf(stderr, "%d:%s: Couldn't listen to port %d\n", pid,
+			fprintf(stderr, "%d:%s: Couldn't listen to port %d\n", (int)pid,
 						__func__, data->port);
 			goto err4;
 		}
@@ -491,7 +500,7 @@
 		connfd = accept(sockfd, NULL, 0);
 		if (connfd < 0) {
 			perror("server accept");
-			fprintf(stderr, "%d:%s: accept() failed\n", pid, __func__);
+			fprintf(stderr, "%d:%s: accept() failed\n", (int)pid, __func__);
 			close(sockfd);
 			goto err4;
 		}
@@ -551,7 +560,7 @@
 	ctx->buf = memalign(page_size, ctx->size * 2);
 	if (!ctx->buf) {
 		fprintf(stderr, "%d:%s: Couldn't allocate work buf.\n",
-					 pid, __func__);
+					 (int)pid, __func__);
 		return NULL;
 	}
 
@@ -565,7 +574,7 @@
 		cm_id = (struct rdma_cm_id *)ptr;
 		ctx->context = cm_id->verbs;
 		if (!ctx->context) {
-			fprintf(stderr, "%d:%s: Unbound cm_id!!\n", pid, 
+			fprintf(stderr, "%d:%s: Unbound cm_id!!\n", (int)pid, 
 							__func__);
 			return NULL;
 		}
@@ -575,7 +584,7 @@
 		ctx->context = ibv_open_device(ib_dev);
 		if (!ctx->context) {
 			fprintf(stderr, "%d:%s: Couldn't get context for %s\n", 
-				pid, __func__, ibv_get_device_name(ib_dev));
+				(int)pid, __func__, ibv_get_device_name(ib_dev));
 			return NULL;
 		}
 	}
@@ -582,7 +591,7 @@
 
 	ctx->pd = ibv_alloc_pd(ctx->context);
 	if (!ctx->pd) {
-		fprintf(stderr, "%d:%s: Couldn't allocate PD\n", pid, __func__);
+		fprintf(stderr, "%d:%s: Couldn't allocate PD\n", (int)pid, __func__);
 		return NULL;
 	}
 
@@ -592,13 +601,13 @@
 	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, ctx->size * 2,
 			     IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
 	if (!ctx->mr) {
-		fprintf(stderr, "%d:%s: Couldn't allocate MR\n", pid, __func__);
+		fprintf(stderr, "%d:%s: Couldn't allocate MR\n", (int)pid, __func__);
 		return NULL;
 	}
 
 	ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
 	if (!ctx->rcq) {
-		fprintf(stderr, "%d:%s: Couldn't create recv CQ\n", pid,
+		fprintf(stderr, "%d:%s: Couldn't create recv CQ\n", (int)pid,
 								 __func__);
 		return NULL;
 	}
@@ -605,7 +614,7 @@
 
 	ctx->scq = ibv_create_cq(ctx->context, ctx->tx_depth, ctx, NULL, 0);
 	if (!ctx->scq) {
-		fprintf(stderr, "%d:%s: Couldn't create send CQ\n", pid,
+		fprintf(stderr, "%d:%s: Couldn't create send CQ\n", (int)pid,
 								 __func__);
 		return NULL;
 	}
@@ -628,7 +637,7 @@
 
 	if (data->use_cma) {
 		if (rdma_create_qp(cm_id, ctx->pd, &attr)) {
-			fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
+			fprintf(stderr, "%d:%s: Couldn't create QP\n", (int)pid, __func__);
 			return NULL;
 		}
 		ctx->qp = cm_id->qp;
@@ -636,7 +645,7 @@
 	} else {
 		ctx->qp = ibv_create_qp(ctx->pd, &attr);
 		if (!ctx->qp)  {
-			fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
+			fprintf(stderr, "%d:%s: Couldn't create QP\n", (int)pid, __func__);
 			return NULL;
 		}
 		{
@@ -653,7 +662,7 @@
 					IBV_QP_PORT               |
 					IBV_QP_ACCESS_FLAGS)) {
 				fprintf(stderr, "%d:%s: Failed to modify QP to INIT\n", 
-						pid, __func__);
+						(int)pid, __func__);
 				return NULL;
 			}
 		}
@@ -710,10 +719,10 @@
 	return 0;
 }
 
+#define ADDR_FMT "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016" "llx" "\n"
+
 static int pp_open_port(struct pingpong_context *ctx, struct pp_data *data )
 {
-	char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";
-
 	/* Create connection between client and server.
 	 * We do it by exchanging data over a TCP socket connection. */
 
@@ -727,7 +736,7 @@
 	data->my_dest.rkey = ctx->mr->rkey;
 	data->my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
 
-	printf(addr_fmt, "local", data->my_dest.lid, data->my_dest.qpn, data->my_dest.psn,
+	printf(ADDR_FMT, "local", data->my_dest.lid, data->my_dest.qpn, data->my_dest.psn,
 			data->my_dest.rkey, data->my_dest.vaddr);
 
 	if (data->servername) {
@@ -738,7 +747,7 @@
 			return 1;
 	}
 
-	printf(addr_fmt, "remote", data->rem_dest->lid, data->rem_dest->qpn,
+	printf(ADDR_FMT, "remote", data->rem_dest->lid, data->rem_dest->qpn,
 			data->rem_dest->psn, data->rem_dest->rkey, 
 			data->rem_dest->vaddr);
 
@@ -783,7 +792,7 @@
         rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
         if (rc) {
                 perror("ibv_post_recv");
-                fprintf(stderr, "%d:%s: ibv_post_recv failed %d\n", pid,
+                fprintf(stderr, "%d:%s: ibv_post_recv failed %d\n", (int)pid,
 				 __func__, rc);
         }
 }
@@ -799,13 +808,13 @@
 	} while (ne == 0);
 
 	if (wc.status) 
-		fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc status %d\n", (int)pid, __func__,
 					 wc.status);
 	if (!(wc.opcode & IBV_WC_RECV))
-		fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc opcode %d\n", (int)pid, __func__,
 					 wc.opcode);
 	if (wc.wr_id != 0xdeadbeef) 
-		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", (int)pid, __func__,
 					 (int)wc.wr_id);
 }
 
@@ -825,7 +834,7 @@
 	ctx->wr.send_flags = IBV_SEND_SIGNALED;
 	ctx->wr.next       = NULL;
 	if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
-		fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
+		fprintf(stderr, "%d:%s: ibv_post_send failed\n", (int)pid, __func__);
 		return;
 	}
 	do {
@@ -834,13 +843,13 @@
 	} while (ne == 0);
 
 	if (wc.status) 
-		fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc status %d\n", (int)pid, __func__,
 						wc.status);
 	if (wc.opcode != IBV_WC_SEND)
-		fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__, 
+		fprintf(stderr, "%d:%s: bad wc opcode %d\n", (int)pid, __func__, 
 						wc.opcode);
 	if (wc.wr_id != 0xcafebabe) 
-		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", (int)pid, __func__,
 						(int)wc.wr_id);
 }
 
@@ -855,13 +864,13 @@
 	} while (ne == 0);
 
 	if (wc.status) 
-		fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc status %d\n", (int)pid, __func__,
 					 wc.status);
 	if (!(wc.opcode & IBV_WC_RECV))
-		fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc opcode %d\n", (int)pid, __func__,
 					 wc.opcode);
 	if (wc.wr_id != 0xdeadbeef) 
-		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", (int)pid, __func__,
 					 (int)wc.wr_id);
 	pp_post_recv(ctx);
 }
@@ -882,7 +891,7 @@
 	ctx->wr.send_flags = IBV_SEND_SIGNALED;
 	ctx->wr.next       = NULL;
 	if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
-		fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
+		fprintf(stderr, "%d:%s: ibv_post_send failed\n", (int)pid, __func__);
 		return;
 	}
 	do {
@@ -891,13 +900,13 @@
 	} while (ne == 0);
 
 	if (wc.status) 
-		fprintf(stderr, "%d:%s: bad wc status %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc status %d\n", (int)pid, __func__,
 					 wc.status);
 	if (wc.opcode != IBV_WC_SEND)
-		fprintf(stderr, "%d:%s: bad wc opcode %d\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc opcode %d\n", (int)pid, __func__,
 					 wc.opcode);
 	if (wc.wr_id != 0xabbaabba) 
-		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", pid, __func__,
+		fprintf(stderr, "%d:%s: bad wc wr_id 0x%x\n", (int)pid, __func__,
 					 (int)wc.wr_id);
 }
 
@@ -910,7 +919,7 @@
                 rc = rdma_disconnect(data.cm_id);
                 if (rc) {
 			perror("rdma_disconnect");
-			fprintf(stderr, "%d:%s: rdma disconnect error\n", pid,
+			fprintf(stderr, "%d:%s: rdma disconnect error\n", (int)pid,
 								 __func__);
 			return;
                 }
@@ -919,7 +928,7 @@
         rdma_get_cm_event(data.cm_channel, &event);
         if (event->event != RDMA_CM_EVENT_DISCONNECTED)
                 fprintf(stderr, "%d:%s: unexpected event during disconnect %d\n", 
-			pid, __func__, event->event);
+			(int)pid, __func__, event->event);
         rdma_ack_cm_event(event);
         rdma_destroy_id(data.cm_id);
         rdma_destroy_event_channel(data.cm_channel);
@@ -989,10 +998,18 @@
 
 
 	if (options->cycles) {
+#if !(defined(__SVR4) && defined(__sun))
 		cycles_to_units = 1;
+#else
+		cycles_to_units = (1/get_cpu_mhz(0)) * 1000;
+#endif
 		units = "cycles";
 	} else {
+#if !(defined(__SVR4) && defined(__sun))
 		cycles_to_units = get_cpu_mhz(0);
+#else
+		cycles_to_units = 1000;
+#endif
 		units = "usec";
 	}
 
@@ -1164,12 +1181,12 @@
 		data.cm_channel = rdma_create_event_channel();
 		if (!data.cm_channel) {
 			fprintf(stderr, "%d:%s: rdma_create_event_channel failed\n",
-							 pid, __func__);
+							 (int)pid, __func__);
 			return 1;
 		}
 		if (rdma_create_id(data.cm_channel, &data.cm_id, NULL, RDMA_PS_TCP)) {
 			fprintf(stderr, "%d:%s: rdma_create_id failed\n",
-							 pid, __func__);
+							 (int)pid, __func__);
 			return 1;
 		}
 	
@@ -1184,12 +1201,12 @@
 		}
 
 		printf("%d: Local address:  LID %#04x, QPN %#06x, PSN %#06x "
-                        "RKey %#08x VAddr %#016Lx\n", pid,
+                        "RKey %#08x VAddr %#016llx\n", (int)pid,
                         data.my_dest.lid, data.my_dest.qpn, data.my_dest.psn,
                         data.my_dest.rkey, data.my_dest.vaddr);
 
         	printf("%d: Remote address: LID %#04x, QPN %#06x, PSN %#06x, "
-                        "RKey %#08x VAddr %#016Lx\n\n", pid,
+                        "RKey %#08x VAddr %#016llx\n\n", (int)pid,
                         data.rem_dest->lid, data.rem_dest->qpn, data.rem_dest->psn,
                         data.rem_dest->rkey, data.rem_dest->vaddr);
 
diff -r -u /tmp/perftest-1.3.0/multicast_resources.h perftest-1.3.0/multicast_resources.h
--- /tmp/perftest-1.3.0/multicast_resources.h	Wed Mar  2 11:01:36 2011
+++ perftest-1.3.0/multicast_resources.h	Fri Aug 26 05:14:56 2011
@@ -68,7 +68,7 @@
 #define DEF_PKEY_IDX        		0
 #define DEF_SLL              		0
 #define MAX_POLL_ITERATION_TIMEOUT  1000000
-#define MCG_GID {255,1,0,0,0,2,201,133,0,0,0,0,0,0,0,0}
+#define MCG_GID {255,21,0,0,0,2,201,133,0,0,0,0,0,0,0,0}
 
 //  Definitions section for MADs 
 #define SUBN_ADM_ATTR_MC_MEMBER_RECORD 0x38
@@ -80,10 +80,11 @@
 #define DEF_TCLASS                     0
 #define DEF_FLOW_LABLE                 0
 
+#if !(defined(__SVR4) && defined(__sun))
 // Macro for 64 bit variables to switch to from net 
 #define ntohll(x) (((u_int64_t)(ntohl((int)((x << 32) >> 32))) << 32) | (unsigned int)ntohl(((int)(x >> 32)))) 
 #define htonll(x) ntohll(x)
-
+#endif
 // generate a bit mask S bits width 
 #define MASK32(S)  ( ((u_int32_t) ~0L) >> (32-(S)) )
 
diff -r -u /tmp/perftest-1.3.0/perftest_resources.c perftest-1.3.0/perftest_resources.c
--- /tmp/perftest-1.3.0/perftest_resources.c	Tue Jan 25 23:31:57 2011
+++ perftest-1.3.0/perftest_resources.c	Fri Feb 11 04:12:48 2011
@@ -11,7 +11,9 @@
 #include <sys/socket.h>
 #include <netdb.h>
 #include <math.h>
+#if !(defined(__SVR4) && defined(__sun))
 // #include <byteswap.h>
+#endif
 #include "perftest_resources.h"
 
 
@@ -566,8 +568,22 @@
 
 	// User did not ask for specific mtu.
 	if (params->mtu == 0) {
+#if !(defined(__SVR4) && defined(__sun))
 		params->curr_mtu = port_attr.active_mtu;
+#else
+		struct ibv_device_attr device_attr;
 
+		if (ibv_query_device(context, &device_attr)) {
+                        fprintf(stderr, "Failed to query device props");
+                        return -1;
+                }
+
+		if (device_attr.vendor_part_id == 23108) {
+			params->curr_mtu = IBV_MTU_1024;
+		} else {
+			params->curr_mtu = IBV_MTU_2048;
+		}
+#endif
 	} else {
 
 		switch (params->mtu) {
@@ -869,7 +885,6 @@
 		close(sockfd);
 		return connfd;
 	}
-
 	close(sockfd);
 	return connfd;
 }
@@ -882,6 +897,8 @@
 				   struct pingpong_dest *my_dest,
 				   struct pingpong_dest *rem_dest) {
 
+    int	temp_reads = 0;
+
     // Client.
     if (params->machine == CLIENT) {
 		if (ctx_write_keys(my_dest,params)) {
@@ -904,6 +921,18 @@
 			return -1;
 		}
     }
+    // We could have tavor at one end and hermon at the other.
+    // To avoid a modify QP error set max_rd_atomic to lowest
+    // on either side of connection.
+    if (rem_dest->out_reads > my_dest->out_reads)
+	temp_reads =  my_dest->out_reads;
+
+    if (my_dest->out_reads > rem_dest->out_reads)
+	my_dest->out_reads = rem_dest->out_reads;
+
+    if (temp_reads)
+	rem_dest->out_reads = temp_reads;
+
     return 0;
 }
 
diff -r -u /tmp/perftest-1.3.0/perftest_resources.h perftest-1.3.0/perftest_resources.h
--- /tmp/perftest-1.3.0/perftest_resources.h	Tue Jan 25 23:31:57 2011
+++ perftest-1.3.0/perftest_resources.h	Fri Feb 11 04:12:48 2011
@@ -128,10 +128,10 @@
 #define KEY_MSG_SIZE_GID    98 // Message size with gid (MGID as well).
 
 // The Format of the message we pass through sockets , without passing Gid.
-#define KEY_PRINT_FMT "%04x:%04x:%06x:%06x:%08x:%016Lx"
+#define KEY_PRINT_FMT "%04x:%04x:%06x:%06x:%08x:%016llx"
 
 // The Format of the message we pass through sockets (With Gid).
-#define KEY_PRINT_FMT_GID "%04x:%04x:%06x:%06x:%08x:%016Lx:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x"
+#define KEY_PRINT_FMT_GID "%04x:%04x:%06x:%06x:%08x:%016llx:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x"
 
 // The Basic print format for all verbs.
 #define BASIC_ADDR_FMT " %s address: LID %#04x QPN %#06x PSN %#06x"
@@ -140,7 +140,7 @@
 #define READ_FMT       " OUT %#04x"
 
 // The print format of the pingpong_dest element for RDMA verbs.
-#define RDMA_FMT       " RKey %#08x VAddr %#016Lx"
+#define RDMA_FMT       " RKey %#08x VAddr %#016llx"
 
 // The print format of a global address or a multicast address.
 #define GID_FMT " %s: %02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d\n"
@@ -154,10 +154,10 @@
 #define RESULT_FMT_LAT " #bytes #iterations    t_min[usec]    t_max[usec]  t_typical[usec]\n"
 
 // Result print format
-#define REPORT_FMT     " %-7lu   %d           %-7.2f            %-7.2f\n"
+#define REPORT_FMT     " %-7"PRIu64"   %d           %-7.2f            %-7.2f\n"
 
 // Result print format for latency tests.
-#define REPORT_FMT_LAT " %-7lu %d          %-7.2f        %-7.2f      %-7.2f\n"
+#define REPORT_FMT_LAT " %-7"PRIu64" %d          %-7.2f        %-7.2f      %-7.2f\n"
 
 // Macro for allocating.
 #define ALLOCATE(var,type,size)                                  \
diff -r -u /tmp/perftest-1.3.0/read_bw.c perftest-1.3.0/read_bw.c
--- /tmp/perftest-1.3.0/read_bw.c	Tue Jan 25 23:31:57 2011
+++ perftest-1.3.0/read_bw.c	Fri Feb 11 04:12:47 2011
@@ -44,6 +44,7 @@
 // #include <limits.h>
 #include <malloc.h>
 // #include <getopt.h>
+#include <inttypes.h>
 #include <time.h>
 #include <infiniband/verbs.h>
 
@@ -336,7 +337,11 @@
 			}
 	}
 
+#if !(defined(__sparc))
 	cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000;
+#else
+	cycles_to_units = 1000000000;
+#endif
 	tsize = user_param->duplex ? 2 : 1;
 	tsize = tsize * user_param->size;
 	
@@ -443,6 +448,7 @@
  ******************************************************************************/
 int main(int argc, char *argv[]) {
 
+	int				ret = 0;
 	int                        i = 0;
 	struct ibv_device		   *ib_dev = NULL;
 	struct pingpong_context    *ctx;
@@ -553,8 +559,10 @@
 
 		for (i = 1; i < 24 ; ++i) {
 			user_param.size = 1 << i;
-			if(run_iter(ctx,&user_param,&rem_dest))
-				return 17;
+			if(run_iter(ctx,&user_param,&rem_dest)) {
+				ret = 17;
+				goto exit;
+			}
 			print_report(&user_param);
 		}
 
@@ -562,11 +570,13 @@
 
 	else {
 
-		if(run_iter(ctx,&user_param,&rem_dest))
-			return 17;
-		
+		if(run_iter(ctx,&user_param,&rem_dest)) {
+			ret = 17;
+			goto exit;
+		}
 		print_report(&user_param);
 	}
+exit:
 
 	if (ctx_close_connection(&user_param,&my_dest,&rem_dest)) {
 		fprintf(stderr,"Failed to close connection between server and client\n");
@@ -575,6 +585,9 @@
 	
 	printf(RESULT_LINE);
 
-	return destroy_ctx_resources(ctx);
+	if (destroy_ctx_resources(ctx))
+		return 1;
+	else
+		return ret; 
 	
 }
diff -r -u /tmp/perftest-1.3.0/read_lat.c perftest-1.3.0/read_lat.c
--- /tmp/perftest-1.3.0/read_lat.c	Tue Jan 25 23:31:57 2011
+++ perftest-1.3.0/read_lat.c	Fri Feb 11 04:12:47 2011
@@ -46,6 +46,7 @@
 #include <malloc.h>
 #include <getopt.h>
 #include <time.h>
+#include <inttypes.h>
 #include <infiniband/verbs.h>
 
 #include "get_clock.h"
@@ -358,10 +359,19 @@
 
 
 	if (user_param->r_flag->cycles) {
+#if !(defined(__sparc))
 		cycles_to_units = 1;
+#else
+		cycles_to_units =
+		   (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000;
+#endif
 		units = "cycles";
 	} else {
+#if !(defined(__sparc))
 		cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f);
+#else
+		cycles_to_units = 1000;
+#endif
 		units = "usec";
 	}
 
diff -r -u /tmp/perftest-1.3.0/send_bw.c perftest-1.3.0/send_bw.c
--- /tmp/perftest-1.3.0/send_bw.c	Thu Jan 20 07:37:18 2011
+++ perftest-1.3.0/send_bw.c	Fri Feb 11 04:12:47 2011
@@ -1,1162 +1,1166 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
- * Copyright (c) 2009 HNR Consulting.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id$
- */
-
-#if HAVE_CONFIG_H
-#  include <config.h>
-#endif /* HAVE_CONFIG_H */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <limits.h>
-#include <malloc.h>
-#include <getopt.h>
-#include <time.h>
-#include <errno.h>
-#include <infiniband/verbs.h>
-
-#include "get_clock.h"
-#include "multicast_resources.h"
-#include "perftest_resources.h"
-
-#define VERSION 2.1
-
-static int page_size;
-cycles_t	*tposted;
-cycles_t	*tcompleted;
-
-struct pingpong_context {
-	struct ibv_context 		*context;
-	struct ibv_comp_channel *channel;
-	struct ibv_pd      		*pd;
-	struct ibv_mr     		**mr;
-	struct ibv_cq      		*cq;
-	struct ibv_qp      		**qp;
-	struct ibv_sge      	list;
-	struct ibv_send_wr  	wr;
-	struct ibv_sge 			*sge_list;
-	struct ibv_recv_wr  	*rwr;
-	struct ibv_ah			*ah;
-	void               		**buf;
-	unsigned            	size;
-	uint64_t				*my_addr;
-};
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static int set_mcast_group(struct pingpong_context *ctx,
-						   struct perftest_parameters *user_parm,
-						   struct mcast_parameters *mcg_params) {
-
-	struct ibv_port_attr port_attr;
-
-	if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&mcg_params->port_gid)) {
-			return 1;
-	}
-		
-	if (ibv_query_pkey(ctx->context,user_parm->ib_port,DEF_PKEY_IDX,&mcg_params->pkey)) {
-		return 1;
-	}
-
-	if (ibv_query_port(ctx->context,user_parm->ib_port,&port_attr)) {
-		return 1;
-	}
-	mcg_params->sm_lid  = port_attr.sm_lid;
-	mcg_params->sm_sl   = port_attr.sm_sl;
-	mcg_params->ib_port = user_parm->ib_port;
-	
-	if (!strcmp(link_layer_str(user_parm->link_type),"IB")) {
-		// Request for Mcast group create registery in SM.
-		if (join_multicast_group(SUBN_ADM_METHOD_SET,mcg_params)) {
-			fprintf(stderr,"Couldn't Register the Mcast group on the SM\n");
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static int set_up_connection(struct pingpong_context *ctx,
-							 struct perftest_parameters *user_parm,
-							 struct pingpong_dest *my_dest,
-							 struct mcast_parameters *mcg_params) {
-
-	int i = (user_parm->duplex) ? 1 : 0;
-
-	if (user_parm->use_mcg && (user_parm->duplex || user_parm->machine == SERVER)) {
-
-		set_multicast_gid(mcg_params,ctx->qp[0]->qp_num,(int)user_parm->machine);
-		if (set_mcast_group(ctx,user_parm,mcg_params)) {
-			return 1;
-		}
-		
-		while (i < user_parm->num_of_qps) {
-			if (ibv_attach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) {
-				fprintf(stderr, "Couldn't attach QP to MultiCast group");
-				return 1;
-			}
-			i++;
-		}
-
-		mcg_params->mcast_state |= MCAST_IS_ATTACHED;
-		my_dest->gid = mcg_params->mgid;
-		my_dest->lid = mcg_params->mlid;
-		my_dest->qpn = QPNUM_MCAST;
-
-	} else {
-		if (user_parm->gid_index != -1) {
-			if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&my_dest->gid)) {
-				return -1;
-			}
-		}
-		my_dest->lid = ctx_get_local_lid(ctx->context,user_parm->ib_port);
-		my_dest->qpn = ctx->qp[0]->qp_num;
-	}
-	my_dest->psn  = lrand48() & 0xffffff;
-
-	// We do not fail test upon lid above RoCE.
-
-	if (user_parm->gid_index < 0) {
-		if (!my_dest->lid) {
-			fprintf(stderr," Local lid 0x0 detected,without any use of gid. Is SM running?\n");
-			return -1;
-		}
-	}
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static int init_connection(struct perftest_parameters *params,
- 						   struct pingpong_dest *my_dest) {
-
-	params->side = LOCAL;
-	ctx_print_pingpong_data(my_dest,params);
-	
-	if (params->machine == CLIENT) 
-		params->sockfd = ctx_client_connect(params->servername,params->port);
-	else 
-		params->sockfd = ctx_server_connect(params->port);
-	
-		
-	if(params->sockfd < 0) {
-		fprintf(stderr,"Unable to open file descriptor for socket connection");
-		return 1;
-	}
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static int destroy_ctx_resources(struct pingpong_context    *ctx, 
-								 struct perftest_parameters *user_parm,
-								 struct pingpong_dest		*my_dest,
-								 struct pingpong_dest		*rem_dest,
-								 struct mcast_parameters    *mcg_params)  {
-
-	int test_result = 0;
-	int i = (user_parm->duplex) ? 1 : 0;
-
-	if (user_parm->use_mcg) {
-
-		if (user_parm->machine == SERVER || user_parm->duplex) {
-			
-			while (i < user_parm->num_of_qps) {
-				if (ibv_detach_mcast(ctx->qp[i],&my_dest->gid,my_dest->lid)) {
-					fprintf(stderr, "Couldn't deattach QP from MultiCast group\n");
-					return 1;
-				}
-				i++;
-			}
-			mcg_params->mgid = my_dest->gid;
-			if (!strcmp(link_layer_str(user_parm->link_type),"IB")) {
-				if (join_multicast_group(SUBN_ADM_METHOD_DELETE,mcg_params)) {
-					fprintf(stderr,"Couldn't Unregister the Mcast group on the SM\n");
-					return 1;
-				}
-			}
-		}
-
-		if (user_parm->machine == CLIENT || user_parm->duplex) {
-
-			mcg_params->mgid = rem_dest->gid;
-			if (!strcmp(link_layer_str(user_parm->link_type),"IB")) {
-				if (join_multicast_group(SUBN_ADM_METHOD_DELETE,mcg_params)) {
-					fprintf(stderr,"Couldn't Unregister the Mcast group on the SM\n");
-					return 1;
-				}
-			}
-
-		}
-	}	
-
-	if (ctx->ah) {
-		if (ibv_destroy_ah(ctx->ah)) {
-			fprintf(stderr, "failed to destroy AH\n");
-			test_result = 1;
-		}
-	}
-
-	for(i = 0; i < user_parm->num_of_qps; i++) {
-		if (ibv_destroy_qp(ctx->qp[i])) {
-			test_result = 1;
-		}
-	}
-	free(ctx->qp);
-
-	if (ibv_destroy_cq(ctx->cq)) {
-		test_result = 1;
-	}
-
-	for(i = 0; i < user_parm->num_of_qps; i++) {
-
-		if (ibv_dereg_mr(ctx->mr[i])) {
-			test_result = 1;
-		}
-		free(ctx->buf[i]);
-	}
-	
-	if (ibv_dealloc_pd(ctx->pd)) {
-		test_result = 1;
-	}
-
-	if (ctx->channel) {
-		if (ibv_destroy_comp_channel(ctx->channel)) {
-			test_result = 1;
-		}
-	}
-	
-	if (ibv_close_device(ctx->context)) {
-		test_result = 1;
-	}
-
-	if (user_parm->machine == SERVER || user_parm->duplex) {
-		free(ctx->rwr);
-		free(ctx->sge_list);
-		free(ctx->my_addr);
-	}
-
-	free(ctx->mr);
-	free(ctx->buf);
-	free(ctx);
-	free(tposted);
-    free(tcompleted);
-	return test_result;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,
-											struct perftest_parameters *user_parm) {
-
-	int i,m_size;
-	int duplex_ind;
-	struct pingpong_context *ctx;
-
-	ALLOCATE(ctx,struct pingpong_context,1);
-	ALLOCATE(ctx->buf,void*,user_parm->num_of_qps);
-	ALLOCATE(ctx->mr,struct ibv_mr*,user_parm->num_of_qps);
-
-	ctx->ah       = NULL;
-	ctx->channel  = NULL;
-
-	duplex_ind = (user_parm->duplex && !user_parm->use_mcg) ? 2 : 1;
-
-	ctx->context = ibv_open_device(ib_dev);
-	if (!ctx->context) {
-		fprintf(stderr, "Couldn't get context for %s\n",
-			ibv_get_device_name(ib_dev));
-		return NULL;
-	}
-
-	// Configure the Link MTU acoording to the user or the active mtu.
-	if (ctx_set_mtu(ctx->context,user_parm)) {
-		fprintf(stderr, "Couldn't set the link layer\n");
-		return NULL;
-	}
-
-	if (user_parm->connection_type == UD && user_parm->size > MTU_SIZE(user_parm->curr_mtu)) {	 
-		printf(" Max msg size in UD is MTU - %d . changing to MTU\n",MTU_SIZE(user_parm->curr_mtu));
-		user_parm->size = MTU_SIZE(user_parm->curr_mtu);
-	}
-
-	if (is_dev_hermon(ctx->context) != NOT_HERMON && user_parm->inline_size != 0)
-		user_parm->inline_size = 0;
-
-	printf(" Inline data is used up to %d bytes message\n", user_parm->inline_size);
-
-	ctx->size = user_parm->size;
-
-	// Finds the link type and configure the HCA accordingly.
-	if (ctx_set_link_layer(ctx->context,user_parm)) {
-		fprintf(stderr, " Couldn't set the link layer\n");
-		return NULL;
-	}
-	
-	if (user_parm->use_event) {
-		ctx->channel = ibv_create_comp_channel(ctx->context);
-		if (!ctx->channel) {
-			fprintf(stderr, "Couldn't create completion channel\n");
-			return NULL;
-		}
-	} else
-		ctx->channel = NULL;                  
-
-	ctx->pd = ibv_alloc_pd(ctx->context);
-	if (!ctx->pd) {
-		fprintf(stderr, "Couldn't allocate PD\n");
-		return NULL;
-	}
-
-	for (i = 0; i < user_parm->num_of_qps; i++) {
-
-		m_size = (BUFF_SIZE(user_parm->size) + IF_UD_ADD(user_parm->connection_type))*duplex_ind;
-		ctx->buf[i] = memalign(page_size,m_size);
-		if (!ctx->buf[i]) {
-			fprintf(stderr, "Couldn't allocate work buf.\n");
-			return NULL;
-		}
-		memset(ctx->buf[i],0,m_size);
-
-		// We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says :
-		// The Consumer is not allowed to assign Remote Write or Remote Atomic to
-		// a Memory Region that has not been assigned Local Write. 
-		ctx->mr[i] = ibv_reg_mr(ctx->pd,
-								ctx->buf[i],
-								m_size,
-								IBV_ACCESS_REMOTE_WRITE | 
-								IBV_ACCESS_LOCAL_WRITE);
-
-		if (!ctx->mr[i]) {
-			fprintf(stderr, "Couldn't allocate MR\n");
-			return NULL;
-		}
-	}
-
-	// Create the CQ according to Client/Server or Duplex setting.
-	ctx->cq = ctx_cq_create(ctx->context,ctx->channel,user_parm);
-	if (ctx->cq == NULL) {
-		fprintf(stderr, "Couldn't create CQ \n");
-		return NULL;
-	}
-
-	ALLOCATE(ctx->qp,struct ibv_qp*,user_parm->num_of_qps);
-	
-	for(i=0; i < user_parm->num_of_qps; i++) {
-		ctx->qp[i] = ctx_qp_create(ctx->pd,ctx->cq,ctx->cq,user_parm);
-		if (ctx->qp[i] == NULL) {
-			return NULL;
-		}
-
-		if(ctx_modify_qp_to_init(ctx->qp[i],user_parm)) {
-			return NULL;
-		}
-	}
-
-	return ctx;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn,
-			              struct pingpong_dest *dest, 
-						  struct perftest_parameters *user_parm)
-{
-	struct ibv_qp_attr attr;
-	memset(&attr, 0, sizeof attr);
-	int i;
-
-	attr.qp_state 		= IBV_QPS_RTR;
-	attr.path_mtu       = user_parm->curr_mtu;
-    attr.dest_qp_num    = dest->qpn;
-	attr.rq_psn         = dest->psn;
-	attr.ah_attr.dlid   = dest->lid;
-	if (user_parm->connection_type == RC) {
-		attr.max_dest_rd_atomic     = 1;
-		attr.min_rnr_timer          = 12;
-	}
-	if (user_parm->gid_index < 0) {
-		attr.ah_attr.is_global  = 0;
-		attr.ah_attr.sl         = user_parm->sl;
-	} else {
-		attr.ah_attr.is_global  = 1;
-		attr.ah_attr.grh.dgid   = dest->gid;
-		attr.ah_attr.grh.sgid_index = user_parm->gid_index;
-		attr.ah_attr.grh.hop_limit = 1;
-		attr.ah_attr.sl         = 0;
-	}
-	attr.ah_attr.src_path_bits = 0;
-	attr.ah_attr.port_num   = user_parm->ib_port;
-	
-	if (user_parm->connection_type == RC) {
-		if (ibv_modify_qp(ctx->qp[0], &attr,
-				  IBV_QP_STATE              |
-				  IBV_QP_AV                 |
-				  IBV_QP_PATH_MTU           |
-				  IBV_QP_DEST_QPN           |
-				  IBV_QP_RQ_PSN             |
-				  IBV_QP_MIN_RNR_TIMER      |
-				  IBV_QP_MAX_DEST_RD_ATOMIC)) {
-			fprintf(stderr, "Failed to modify RC QP to RTR\n");
-			return 1;
-		}
-		attr.timeout            = user_parm->qp_timeout;
-		attr.retry_cnt          = 7;
-		attr.rnr_retry          = 7;
-	} else if (user_parm->connection_type == UC) {
-		if (ibv_modify_qp(ctx->qp[0], &attr,
-				  IBV_QP_STATE              |
-				  IBV_QP_AV                 |
-				  IBV_QP_PATH_MTU           |
-				  IBV_QP_DEST_QPN           |
-				  IBV_QP_RQ_PSN)) {
-			fprintf(stderr, "Failed to modify UC QP to RTR\n");
-			return 1;
-		}
-	} 
-	 
-	else {
-		for (i = 0; i < user_parm->num_of_qps; i++) {
-			if (ibv_modify_qp(ctx->qp[i],&attr,IBV_QP_STATE )) {
-				fprintf(stderr, "Failed to modify UD QP to RTR\n");
-				return 1;
-			}
-		}
-		if (user_parm->machine == CLIENT || user_parm->duplex) {
-			ctx->ah = ibv_create_ah(ctx->pd,&attr.ah_attr);
-			if (!ctx->ah) {
-				fprintf(stderr, "Failed to create AH for UD\n");
-				return 1;
-			}
-		}
-	}
-
-	if (user_parm->machine == CLIENT || user_parm->duplex) {
-
-		attr.qp_state 	    = IBV_QPS_RTS;
-		attr.sq_psn 	    = my_psn;
-		if (user_parm->connection_type == RC) {
-			attr.max_rd_atomic  = 1;
-			if (ibv_modify_qp(ctx->qp[0], &attr,
-					IBV_QP_STATE              |
-					IBV_QP_SQ_PSN             |
-					IBV_QP_TIMEOUT            |
-					IBV_QP_RETRY_CNT          |
-					IBV_QP_RNR_RETRY          |
-					IBV_QP_MAX_QP_RD_ATOMIC)) {
-				fprintf(stderr, "Failed to modify RC QP to RTS\n");
-				return 1;
-			}
-
-		} else {
-			if(ibv_modify_qp(ctx->qp[0],&attr,IBV_QP_STATE |IBV_QP_SQ_PSN)) {
-				fprintf(stderr, "Failed to modify UC QP to RTS\n");
-				return 1;
-			}
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static int set_recv_wqes(struct pingpong_context *ctx,
-						 struct perftest_parameters *user_param) {
-						
-	int					i,j,buff_size;
-	int 				duplex_ind;
-	struct ibv_recv_wr  *bad_wr_recv;
-
-	i = (user_param->duplex && user_param->use_mcg) ? 1 : 0;
-	duplex_ind = (user_param->duplex && !user_param->use_mcg) ? 1 : 0;
-
-	buff_size = BUFF_SIZE(ctx->size) + IF_UD_ADD(user_param->connection_type);
-
-	while (i < user_param->num_of_qps) {
-
-		ctx->sge_list[i].addr   = (uintptr_t)ctx->buf[i] + duplex_ind*buff_size;
-
-		if (user_param->connection_type == UD) 
-			ctx->sge_list[i].addr += (CACHE_LINE_SIZE - UD_ADDITION);
-
-		ctx->sge_list[i].length = SIZE(user_param->connection_type,user_param->size);
-		ctx->sge_list[i].lkey   = ctx->mr[i]->lkey;
-		ctx->rwr[i].sg_list     = &ctx->sge_list[i];
-		ctx->rwr[i].wr_id       = i;
-		ctx->rwr[i].next        = NULL;
-		ctx->rwr[i].num_sge	    = MAX_RECV_SGE;
-		ctx->my_addr[i]		    = (uintptr_t)ctx->buf[i] + duplex_ind*buff_size;
-		
-		for (j = 0; j < user_param->rx_depth; ++j) {
-
-			if (ibv_post_recv(ctx->qp[i],&ctx->rwr[i],&bad_wr_recv)) {
-				fprintf(stderr, "Couldn't post recv Qp = %d: counter=%d\n",i,j);
-				return 1;
-			}
-
-			if (user_param->size <= (CYCLE_BUFFER / 2))
-				increase_loc_addr(&ctx->sge_list[i],user_param->size,j,ctx->my_addr[i],user_param->connection_type);
-		}
-		i++;
-	}
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static void set_send_wqe(struct pingpong_context *ctx,int rem_qpn,
-						 struct perftest_parameters *user_param) {
-
-	ctx->list.addr     = (uintptr_t)ctx->buf[0];
-	ctx->list.lkey 	   = ctx->mr[0]->lkey;
-
-	ctx->wr.sg_list    = &ctx->list;
-	ctx->wr.num_sge    = 1;
-	ctx->wr.opcode     = IBV_WR_SEND;
-	ctx->wr.next       = NULL;
-	ctx->wr.wr_id      = PINGPONG_SEND_WRID;
-	ctx->wr.send_flags = IBV_SEND_SIGNALED;
-
-	if (user_param->connection_type == UD) {
-		ctx->wr.wr.ud.ah          = ctx->ah;
-		ctx->wr.wr.ud.remote_qkey = DEF_QKEY;
-		ctx->wr.wr.ud.remote_qpn  = rem_qpn;
-	}
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static int pp_drain_qp(struct pingpong_context *ctx,
-						struct perftest_parameters *user_param,
-						int psn,struct pingpong_dest *dest,
-						struct mcast_parameters *mcg_params) {
-
-	struct ibv_qp_attr attr;
-	struct ibv_wc      wc;
-	int                i;
-
-	memset(&attr, 0, sizeof attr);
-	attr.qp_state = IBV_QPS_ERR;
-
-	for (i = 0; i <  user_param->num_of_qps; i++) {
-
-		if (ibv_modify_qp(ctx->qp[i],&attr,IBV_QP_STATE)) {
-			fprintf(stderr, "Failed to modify RC QP to ERR\n");
-			return 1;
-		}
-
-		while (ibv_poll_cq(ctx->cq,1,&wc));
-   
-		attr.qp_state = IBV_QPS_RESET;
-
-		if (ibv_modify_qp(ctx->qp[i],&attr,IBV_QP_STATE)) {
-			fprintf(stderr, "Failed to modify RC QP to RESET\n");
-			return 1;
-		}
-
-		if(ctx_modify_qp_to_init(ctx->qp[i],user_param)) {
-			return 1;
-		}
-
-		if (user_param->use_mcg) {
-
-			if ((!user_param->duplex && user_param->machine == SERVER) || (user_param->duplex && i > 0)) {
-				if (ibv_attach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) {
-					fprintf(stderr, "Couldn't attach QP to MultiCast group");
-					return 1;
-				}
-			}
-		}
-	}
-
-	if (pp_connect_ctx(ctx,psn,dest,user_param)) {
-		return 1;
-	}
-
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-static void print_report(struct perftest_parameters *user_param) {
-
-	double cycles_to_units;
-	unsigned long tsize;	/* Transferred size, in megabytes */
-	int i, j;
-	int opt_posted = 0, opt_completed = 0;
-	cycles_t opt_delta;
-	cycles_t t;
-
-
-	opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
-
-	if (user_param->noPeak == OFF) {
-		/* Find the peak bandwidth, unless asked not to in command line */
-		for (i = 0; i < user_param->iters; ++i)
-			for (j = i; j < user_param->iters; ++j) {
-				t = (tcompleted[j] - tposted[i]) / (j - i + 1);
-				if (t < opt_delta) {
-					opt_delta  = t;
-					opt_posted = i;
-					opt_completed = j;
-				}
-			}
-	}
-
-	cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000;
-
-	tsize = user_param->duplex ? 2 : 1;
-	tsize = tsize * user_param->size;
-	printf(REPORT_FMT,user_param->size,user_param->iters,(user_param->noPeak == OFF) * tsize * cycles_to_units / opt_delta / 0x100000,
-	       tsize * user_param->iters * cycles_to_units /(tcompleted[user_param->iters - 1] - tposted[0]) / 0x100000);
-}
-
-/****************************************************************************** 
- * Important note :															  
- * In case of UD/UC this is NOT the way to measureBW since we are running with 
- * loop on the send side , while we should run on the recieve side or enable 
- * retry in SW , Since the sender may be faster than the reciver.
- * Although	we had posted recieve it is not enough and might end this will
- * result in deadlock of test since both sides are stuck on poll cq.
- * In this test i do not solve this for the general test ,need to write
- * seperate test for UC/UD but in case the tx_depth is ~1/3 from the
- * number of iterations this should be ok .
- * Also note that the sender is limited in the number of send, ans
- * i try to make the reciver full .
- ******************************************************************************/
-int run_iter_bi(struct pingpong_context *ctx, 
-				struct perftest_parameters *user_param)  {
-
-	int                     scnt    = 0;
-	int 					ccnt    = 0;
-	int 					rcnt    = 0;
-	int 					i       = 0;
-	int 					num_of_qps = user_param->num_of_qps;
-	int 					ne;
-	struct ibv_wc 			*wc          = NULL;
-	int 					*rcnt_for_qp = NULL;
-	struct ibv_recv_wr      *bad_wr_recv = NULL;
-	struct ibv_send_wr 		*bad_wr      = NULL;
-
-	ALLOCATE(rcnt_for_qp,int,user_param->num_of_qps);
-	ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);
-	memset(rcnt_for_qp,0,sizeof(int)*user_param->num_of_qps);
-
-	if (user_param->use_mcg)
-		num_of_qps--; 
-	
-	// Set the length of the scatter in case of ALL option.
-	ctx->list.length = user_param->size;
-	ctx->list.addr   = (uintptr_t)ctx->buf[0];
-	ctx->wr.send_flags = IBV_SEND_SIGNALED;
-	
-	if (user_param->size <= user_param->inline_size) 
-		ctx->wr.send_flags |= IBV_SEND_INLINE; 
-
-	while (ccnt < user_param->iters || rcnt < user_param->iters) {
-                
-		while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth / 2) {
-
-			if (scnt %  CQ_MODERATION == 0 && CQ_MODERATION > 1)
-				ctx->wr.send_flags &= ~IBV_SEND_SIGNALED;
-
-			tposted[scnt] = get_cycles();
-			if (ibv_post_send(ctx->qp[0],&ctx->wr, &bad_wr)) {
-				fprintf(stderr, "Couldn't post send: scnt=%d\n",scnt);
-				return 1;
-			}
-
-			if (user_param->size <= (CYCLE_BUFFER / 2))
-				increase_loc_addr(&ctx->list,user_param->size,scnt,(uintptr_t)ctx->buf[0],0);
-
-			++scnt;
-
-			if ((scnt % CQ_MODERATION) == (CQ_MODERATION - 1) || scnt == (user_param->iters - 1)) 
-				ctx->wr.send_flags |= IBV_SEND_SIGNALED;
-		}
-
-		if (user_param->use_event) {
-
-			if (ctx_notify_events(ctx->cq,ctx->channel)) {
-				fprintf(stderr,"Failed to notify events to CQ");
-				return 1;
-			}
-		}
-
-		do {
-			ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);
-			if (ne > 0) {
-				for (i = 0; i < ne; i++) {
-					
-					if (wc[i].status != IBV_WC_SUCCESS)
-						 NOTIFY_COMP_ERROR_SEND(wc[i],scnt,ccnt);
-
-					if ((int) wc[i].wr_id == PINGPONG_SEND_WRID) {
-						ccnt += CQ_MODERATION;
-						if (ccnt >= user_param->iters - 1) 
-							tcompleted[user_param->iters - 1] = get_cycles();
-
-						else 
-							tcompleted[ccnt - 1] = get_cycles();
-					}
-
-					else {
-
-						rcnt_for_qp[wc[i].wr_id]++;
-						rcnt++;
-						if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {
-							fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%d\n",(int)wc[i].wr_id , rcnt_for_qp[wc[i].wr_id]);
-							return 15;
-						}
-
-						if (user_param->size <= (CYCLE_BUFFER / 2))
-							increase_loc_addr(&ctx->sge_list[wc[i].wr_id],
-							  user_param->size,rcnt_for_qp[wc[i].wr_id] + user_param->rx_depth - 1,
-							  ctx->my_addr[wc[i].wr_id],user_param->connection_type);	
-					}
-				}
-			}
-		} while (ne > 0);
-
-		if (ne < 0) {
-			fprintf(stderr, "poll CQ failed %d\n", ne);
-			return 1;
-		}
-	}
-	
-	if (user_param->size <= user_param->inline_size) 
-		ctx->wr.send_flags &= ~IBV_SEND_INLINE;
-	
-	free(rcnt_for_qp);
-	free(wc);
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-int run_iter_uni_server(struct pingpong_context *ctx, 
-						struct perftest_parameters *user_param) {
-
-	int 				rcnt = 0;
-	int 				ne,i;
-	int                 *rcnt_for_qp = NULL;
-	struct ibv_wc 		*wc          = NULL;
-	struct ibv_recv_wr  *bad_wr_recv = NULL;
-
-	ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);
-	ALLOCATE(rcnt_for_qp,int,user_param->num_of_qps);
-
-	memset(rcnt_for_qp,0,sizeof(int)*user_param->num_of_qps);
-
-	while (rcnt < user_param->iters) {
-
-		if (user_param->use_event) {
-			if (ctx_notify_events(ctx->cq,ctx->channel)) {
-				fprintf(stderr ," Failed to notify events to CQ");
-				return 1;
-			}
-		}
-		
-		do {
-			ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);
-			if (ne > 0) {
-				for (i = 0; i < ne; i++) {
-					
-					if (wc[i].status != IBV_WC_SUCCESS) 
-						NOTIFY_COMP_ERROR_RECV(wc[i],rcnt_for_qp[wc[i].wr_id]);
-						
-					rcnt_for_qp[wc[i].wr_id]++;
-					tcompleted[rcnt++] = get_cycles();
-
-				   	if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {
-						fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%d\n",(int)wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]);
-						return 15;
-					}
-
-					if (user_param->size <= (CYCLE_BUFFER / 2))
-						increase_loc_addr(&ctx->sge_list[wc[i].wr_id],user_param->size,
-										  rcnt_for_qp[wc[i].wr_id] + user_param->rx_depth,
-										  ctx->my_addr[wc[i].wr_id],user_param->connection_type);						
-				}
-			}
-		} while (ne > 0);
-
-		if (ne < 0) {
-			fprintf(stderr, "Poll Recieve CQ failed %d\n", ne);
-			return 1;
-		}
-	}
-
-	tposted[0] = tcompleted[0];
-	free(wc);
-	free(rcnt_for_qp);
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-int run_iter_uni_client(struct pingpong_context *ctx, 
-						struct perftest_parameters *user_param) {
-
-	int 		       ne;
-	int 			   i    = 0;
-	int                scnt = 0;
-	int                ccnt = 0;
-	struct ibv_wc      *wc     = NULL;
-	struct ibv_send_wr *bad_wr = NULL;
-
-	ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);
-
-	// Set the lenght of the scatter in case of ALL option.
-	ctx->list.length = user_param->size;
-	ctx->list.addr   = (uintptr_t)ctx->buf[0];
-	ctx->wr.send_flags = IBV_SEND_SIGNALED; 
-
-	if (user_param->size <= user_param->inline_size) 
-		ctx->wr.send_flags |= IBV_SEND_INLINE; 
-	
-
-	while (scnt < user_param->iters || ccnt < user_param->iters) {
-		while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {
-
-			if (scnt %  CQ_MODERATION == 0 && CQ_MODERATION > 1)
-				ctx->wr.send_flags &= ~IBV_SEND_SIGNALED;
-
-			tposted[scnt] = get_cycles();
-			if (ibv_post_send(ctx->qp[0], &ctx->wr, &bad_wr)) {
-				fprintf(stderr, "Couldn't post send: scnt=%d\n",scnt);
-				return 1;
-			}
-
-			if (user_param->size <= (CYCLE_BUFFER / 2))
-				increase_loc_addr(&ctx->list,user_param->size,scnt,(uintptr_t)ctx->buf[0],0);
-
-			scnt++;
-
-			if ((scnt % CQ_MODERATION) == (CQ_MODERATION - 1) || scnt == (user_param->iters - 1)) 
-				ctx->wr.send_flags |= IBV_SEND_SIGNALED;
-		}
-
-		if (ccnt < user_param->iters) {	
-			
-			if (user_param->use_event) {
-				if (ctx_notify_events(ctx->cq,ctx->channel)) {
-					fprintf(stderr , " Failed to notify events to CQ");
-					return 1;
-				}
-			} 
-			do {
-				ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);
-				if (ne > 0) {
-					for (i = 0; i < DEF_WC_SIZE; i++) {
-
-						if (wc[i].status != IBV_WC_SUCCESS) 
-							NOTIFY_COMP_ERROR_SEND(wc[i],scnt,ccnt);
-			
-						ccnt += CQ_MODERATION;
-						if (ccnt >= user_param->iters - 1) 
-							tcompleted[user_param->iters - 1] = get_cycles();
-
-						else 
-							tcompleted[ccnt - 1] = get_cycles();
-					}
-				}
-                         
-					
-			} while (ne > 0);
-
-			if (ne < 0) {
-				fprintf(stderr, "poll CQ failed\n");
-				return 1;
-			}
-		}
-	}
-
-	if (user_param->size <= user_param->inline_size) 
-		ctx->wr.send_flags &= ~IBV_SEND_INLINE;
-
-	free(wc);
-	return 0;
-}
-
-/****************************************************************************** 
- *
- ******************************************************************************/
-int main(int argc, char *argv[])
-{
-	struct ibv_device		 	*ib_dev = NULL;
-	struct pingpong_context  	*ctx;
-	struct pingpong_dest	 	my_dest,rem_dest;
-	struct perftest_parameters  user_param;
-	struct mcast_parameters     mcg_params;
-	int                      	i = 0;
-	int                      	size_max_pow = 24;
-	int							size_of_arr;
-
-	// Pointer to The relevent function of run_iter according to machine type.
-	int (*ptr_to_run_iter_uni)(struct pingpong_context*,struct perftest_parameters*);
-
-	/* init default values to user's parameters */
-	memset(&user_param, 0 , sizeof(struct perftest_parameters));
-	memset(&mcg_params, 0 , sizeof(struct mcast_parameters));
-	memset(&my_dest   , 0 , sizeof(struct pingpong_dest));
-	memset(&rem_dest   , 0 , sizeof(struct pingpong_dest));
- 
-	user_param.verb    = SEND;
-	user_param.tst     = BW;
-	user_param.version = VERSION;
-
-	if (parser(&user_param,argv,argc)) 
-		return 1;
-
-	printf(RESULT_LINE);
-
-	user_param.rx_depth = (user_param.iters < user_param.rx_depth) ? user_param.iters : user_param.rx_depth ;
-
-    if (user_param.use_mcg) {
-
-		user_param.connection_type = UD;
-		if (user_param.duplex) {
-			user_param.num_of_qps++;
-			printf("                    Send Bidirectional BW  -  Multicast Test\n");
-		}
-		else {
-			printf("                    Send BW  -  Multicast Test\n");
-			if (user_param.machine == CLIENT)
-				user_param.num_of_qps = 1;
-		}
-    }
-
-	else if (user_param.duplex) {
-		    printf("                    Send Bidirectional BW Test\n");
-	} else 
-		    printf("                    Send BW Test\n");
-
-	if (user_param.use_event) 
-		printf(" Test with events.\n");
-
-	if (user_param.connection_type == RC)
-		printf(" Connection type : RC\n");
-	else if (user_param.connection_type == UC)
-		printf(" Connection type : UC\n");
-	else{
-		printf(" Connection type : UD\n");
-	}
-	
-	// Done with parameter parsing. Perform setup.
-	if (user_param.all == ON) {
-		// since we run all sizes 
-		user_param.size = MAX_SIZE;
-	}
-
-	srand48(getpid() * time(NULL));
-	page_size = sysconf(_SC_PAGESIZE);
-
-	ib_dev = ctx_find_dev(user_param.ib_devname);
-	if (!ib_dev)
-		return 7;
-
-	mcg_params.ib_devname = ibv_get_device_name(ib_dev);
-
-	ctx = pp_init_ctx(ib_dev,&user_param);
-	if (!ctx)
-		return 1;
-
-	// Set up the Connection.
-	if (set_up_connection(ctx,&user_param,&my_dest,&mcg_params)) {
-		fprintf(stderr," Unable to set up socket connection\n");
-		return 1;
-	}	
-
-	// Init the connection and print the local data.
-	if (init_connection(&user_param,&my_dest)) {
-		fprintf(stderr," Unable to init the socket connection\n");
-		return 1;
-	}
-
-	// shaking hands and gather the other side info.
-    if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
-        fprintf(stderr,"Failed to exchange date between server and clients\n");
-        return 1;
-        
-    }
-	// For printing only MGID in the remote side.
-	user_param.side = REMOTE;
-	ctx_print_pingpong_data(&rem_dest,&user_param);
-
-	// Joining the Send side port the Mcast gid
-	if (user_param.use_mcg && (user_param.machine == CLIENT || user_param.duplex)) {
-		memcpy(mcg_params.mgid.raw, rem_dest.gid.raw, 16);
-		if (set_mcast_group(ctx,&user_param,&mcg_params)) {
-			fprintf(stderr," Unable to Join Sender to Mcast gid\n");
-			return 1;
-		}
-	}
-
-	// Prepare IB resources for rtr/rts.
-	if (pp_connect_ctx(ctx,my_dest.psn,&rem_dest,&user_param)) {
-		fprintf(stderr," Unable to Connect the HCA's through the link\n");
-		return 1;
-	}
-	
-	// shaking hands and gather the other side info.
-    if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
-        fprintf(stderr,"Failed to exchange date between server and clients\n");
-        return 1;
-        
-    }
-
-	if (user_param.use_event) {
-		if (ibv_req_notify_cq(ctx->cq, 0)) {
-			fprintf(stderr, " Couldn't request CQ notification\n");
-			return 1;
-		} 
-	}
-
-	printf(RESULT_LINE);
-	printf(RESULT_FMT);
-
-	size_of_arr = (user_param.duplex) ? 1 : user_param.num_of_qps;
-
-	ALLOCATE(tposted,cycles_t,user_param.iters*size_of_arr);
-	ALLOCATE(tcompleted,cycles_t,user_param.iters*size_of_arr);
-
-	if (user_param.machine == SERVER || user_param.duplex) {
-		ALLOCATE(ctx->rwr,struct ibv_recv_wr,user_param.num_of_qps);
-		ALLOCATE(ctx->sge_list,struct ibv_sge,user_param.num_of_qps);
-		ALLOCATE(ctx->my_addr ,uint64_t ,user_param.num_of_qps);
-	}
-
-	ptr_to_run_iter_uni = (user_param.machine == CLIENT) ?	&run_iter_uni_client : &run_iter_uni_server;
-	
-	if (user_param.machine == SERVER && !user_param.duplex) {
-		user_param.noPeak = ON;
-	}
-
-	if (user_param.machine == CLIENT || user_param.duplex) {
-		set_send_wqe(ctx,rem_dest.qpn,&user_param);
-	}
-
-	if (user_param.all == ON) {
-
-		if (user_param.connection_type == UD) 
-		   size_max_pow =  (int)UD_MSG_2_EXP(MTU_SIZE(user_param.curr_mtu)) + 1;
-
-		for (i = 1; i < size_max_pow ; ++i) {
-			user_param.size = 1 << i;
-
-			if (user_param.machine == SERVER || user_param.duplex) {
-				if (set_recv_wqes(ctx,&user_param)) {
-					fprintf(stderr," Failed to post receive recv_wqes\n");
-					return 1;
-				}
-			}
-
-			if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
-				fprintf(stderr,"Failed to exchange date between server and clients\n");
-				return 1;
-			}
-
-			if (user_param.duplex) {
-				if(run_iter_bi(ctx,&user_param))
-					return 17;
-			} else {
-				if((*ptr_to_run_iter_uni)(ctx,&user_param))
-					return 17;
-			}
-			print_report(&user_param);
-
-			if (pp_drain_qp(ctx,&user_param,my_dest.psn,&rem_dest,&mcg_params)) {
-				fprintf(stderr,"Failed to drain Recv queue (performance optimization)\n");
-				return 1;
-			}
-
-			if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
-				fprintf(stderr,"Failed to exchange date between server and clients\n");
-				return 1;
-			}
-        
-		}
-
-	} else {
-
-		if (user_param.machine == SERVER || user_param.duplex) {
-			if (set_recv_wqes(ctx,&user_param)) {
-				fprintf(stderr," Failed to post receive recv_wqes\n");
-				return 1;
-			}
-		}
-
-		if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
-			fprintf(stderr,"Failed to exchange date between server and clients\n");
-			return 1;
-		}
-
-		if (user_param.duplex) {
-			if(run_iter_bi(ctx,&user_param))
-				return 18;
-
-		} else {
-			if((*ptr_to_run_iter_uni)(ctx,&user_param))
-				return 18;
-		}
-
-		print_report(&user_param);	
-	}
-		
-	if (ctx_close_connection(&user_param,&my_dest,&rem_dest)) {
-		fprintf(stderr," Failed to close connection between server and client\n");
-		return 1;
-	}
-
-	printf(RESULT_LINE);
-	return destroy_ctx_resources(ctx,&user_param,&my_dest,&rem_dest,&mcg_params);
-}
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2009 HNR Consulting.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <limits.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <time.h>
+#include <errno.h>
+#include <infiniband/verbs.h>
+
+#include "get_clock.h"
+#include "multicast_resources.h"
+#include "perftest_resources.h"
+
+#define VERSION 2.1
+
+static int page_size;
+cycles_t	*tposted;
+cycles_t	*tcompleted;
+
+struct pingpong_context {
+	struct ibv_context 		*context;
+	struct ibv_comp_channel *channel;
+	struct ibv_pd      		*pd;
+	struct ibv_mr     		**mr;
+	struct ibv_cq      		*cq;
+	struct ibv_qp      		**qp;
+	struct ibv_sge      	list;
+	struct ibv_send_wr  	wr;
+	struct ibv_sge 			*sge_list;
+	struct ibv_recv_wr  	*rwr;
+	struct ibv_ah			*ah;
+	void               		**buf;
+	unsigned            	size;
+	uint64_t				*my_addr;
+};
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static int set_mcast_group(struct pingpong_context *ctx,
+						   struct perftest_parameters *user_parm,
+						   struct mcast_parameters *mcg_params) {
+
+	struct ibv_port_attr port_attr;
+
+	if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&mcg_params->port_gid)) {
+			return 1;
+	}
+		
+	if (ibv_query_pkey(ctx->context,user_parm->ib_port,DEF_PKEY_IDX,&mcg_params->pkey)) {
+		return 1;
+	}
+
+	if (ibv_query_port(ctx->context,user_parm->ib_port,&port_attr)) {
+		return 1;
+	}
+	mcg_params->sm_lid  = port_attr.sm_lid;
+	mcg_params->sm_sl   = port_attr.sm_sl;
+	mcg_params->ib_port = user_parm->ib_port;
+	
+	if (!strcmp(link_layer_str(user_parm->link_type),"IB")) {
+		// Request for Mcast group create registery in SM.
+		if (join_multicast_group(SUBN_ADM_METHOD_SET,mcg_params)) {
+			fprintf(stderr,"Couldn't Register the Mcast group on the SM\n");
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static int set_up_connection(struct pingpong_context *ctx,
+							 struct perftest_parameters *user_parm,
+							 struct pingpong_dest *my_dest,
+							 struct mcast_parameters *mcg_params) {
+
+	int i = (user_parm->duplex) ? 1 : 0;
+
+	if (user_parm->use_mcg && (user_parm->duplex || user_parm->machine == SERVER)) {
+
+		set_multicast_gid(mcg_params,ctx->qp[0]->qp_num,(int)user_parm->machine);
+		if (set_mcast_group(ctx,user_parm,mcg_params)) {
+			return 1;
+		}
+		
+		while (i < user_parm->num_of_qps) {
+			if (ibv_attach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) {
+				fprintf(stderr, "Couldn't attach QP to MultiCast group");
+				return 1;
+			}
+			i++;
+		}
+
+		mcg_params->mcast_state |= MCAST_IS_ATTACHED;
+		my_dest->gid = mcg_params->mgid;
+		my_dest->lid = mcg_params->mlid;
+		my_dest->qpn = QPNUM_MCAST;
+
+	} else {
+		if (user_parm->gid_index != -1) {
+			if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&my_dest->gid)) {
+				return -1;
+			}
+		}
+		my_dest->lid = ctx_get_local_lid(ctx->context,user_parm->ib_port);
+		my_dest->qpn = ctx->qp[0]->qp_num;
+	}
+	my_dest->psn  = lrand48() & 0xffffff;
+
+	// We do not fail test upon lid above RoCE.
+
+	if (user_parm->gid_index < 0) {
+		if (!my_dest->lid) {
+			fprintf(stderr," Local lid 0x0 detected,without any use of gid. Is SM running?\n");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static int init_connection(struct perftest_parameters *params,
+ 						   struct pingpong_dest *my_dest) {
+
+	params->side = LOCAL;
+	ctx_print_pingpong_data(my_dest,params);
+	
+	if (params->machine == CLIENT) 
+		params->sockfd = ctx_client_connect(params->servername,params->port);
+	else 
+		params->sockfd = ctx_server_connect(params->port);
+	
+		
+	if(params->sockfd < 0) {
+		fprintf(stderr,"Unable to open file descriptor for socket connection");
+		return 1;
+	}
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static int destroy_ctx_resources(struct pingpong_context    *ctx, 
+								 struct perftest_parameters *user_parm,
+								 struct pingpong_dest		*my_dest,
+								 struct pingpong_dest		*rem_dest,
+								 struct mcast_parameters    *mcg_params)  {
+
+	int test_result = 0;
+	int i = (user_parm->duplex) ? 1 : 0;
+
+	if (user_parm->use_mcg) {
+
+		if (user_parm->machine == SERVER || user_parm->duplex) {
+			
+			while (i < user_parm->num_of_qps) {
+				if (ibv_detach_mcast(ctx->qp[i],&my_dest->gid,my_dest->lid)) {
+					fprintf(stderr, "Couldn't deattach QP from MultiCast group\n");
+					return 1;
+				}
+				i++;
+			}
+			mcg_params->mgid = my_dest->gid;
+			if (!strcmp(link_layer_str(user_parm->link_type),"IB")) {
+				if (join_multicast_group(SUBN_ADM_METHOD_DELETE,mcg_params)) {
+					fprintf(stderr,"Couldn't Unregister the Mcast group on the SM\n");
+					return 1;
+				}
+			}
+		}
+
+		if (user_parm->machine == CLIENT || user_parm->duplex) {
+
+			mcg_params->mgid = rem_dest->gid;
+			if (!strcmp(link_layer_str(user_parm->link_type),"IB")) {
+				if (join_multicast_group(SUBN_ADM_METHOD_DELETE,mcg_params)) {
+					fprintf(stderr,"Couldn't Unregister the Mcast group on the SM\n");
+					return 1;
+				}
+			}
+
+		}
+	}	
+
+	if (ctx->ah) {
+		if (ibv_destroy_ah(ctx->ah)) {
+			fprintf(stderr, "failed to destroy AH\n");
+			test_result = 1;
+		}
+	}
+
+	for(i = 0; i < user_parm->num_of_qps; i++) {
+		if (ibv_destroy_qp(ctx->qp[i])) {
+			test_result = 1;
+		}
+	}
+	free(ctx->qp);
+
+	if (ibv_destroy_cq(ctx->cq)) {
+		test_result = 1;
+	}
+
+	for(i = 0; i < user_parm->num_of_qps; i++) {
+
+		if (ibv_dereg_mr(ctx->mr[i])) {
+			test_result = 1;
+		}
+		free(ctx->buf[i]);
+	}
+	
+	if (ibv_dealloc_pd(ctx->pd)) {
+		test_result = 1;
+	}
+
+	if (ctx->channel) {
+		if (ibv_destroy_comp_channel(ctx->channel)) {
+			test_result = 1;
+		}
+	}
+	
+	if (ibv_close_device(ctx->context)) {
+		test_result = 1;
+	}
+
+	if (user_parm->machine == SERVER || user_parm->duplex) {
+		free(ctx->rwr);
+		free(ctx->sge_list);
+		free(ctx->my_addr);
+	}
+
+	free(ctx->mr);
+	free(ctx->buf);
+	free(ctx);
+	free(tposted);
+    free(tcompleted);
+	return test_result;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,
+											struct perftest_parameters *user_parm) {
+
+	int i,m_size;
+	int duplex_ind;
+	struct pingpong_context *ctx;
+
+	ALLOCATE(ctx,struct pingpong_context,1);
+	ALLOCATE(ctx->buf,void*,user_parm->num_of_qps);
+	ALLOCATE(ctx->mr,struct ibv_mr*,user_parm->num_of_qps);
+
+	ctx->ah       = NULL;
+	ctx->channel  = NULL;
+
+	duplex_ind = (user_parm->duplex && !user_parm->use_mcg) ? 2 : 1;
+
+	ctx->context = ibv_open_device(ib_dev);
+	if (!ctx->context) {
+		fprintf(stderr, "Couldn't get context for %s\n",
+			ibv_get_device_name(ib_dev));
+		return NULL;
+	}
+
+	// Configure the Link MTU acoording to the user or the active mtu.
+	if (ctx_set_mtu(ctx->context,user_parm)) {
+		fprintf(stderr, "Couldn't set the link layer\n");
+		return NULL;
+	}
+
+	if (user_parm->connection_type == UD && user_parm->size > MTU_SIZE(user_parm->curr_mtu)) {	 
+		printf(" Max msg size in UD is MTU - %d . changing to MTU\n",MTU_SIZE(user_parm->curr_mtu));
+		user_parm->size = MTU_SIZE(user_parm->curr_mtu);
+	}
+
+	if (is_dev_hermon(ctx->context) == NOT_HERMON && user_parm->inline_size != 0)
+		user_parm->inline_size = 0;
+
+	printf(" Inline data is used up to %d bytes message\n", user_parm->inline_size);
+
+	ctx->size = user_parm->size;
+
+	// Finds the link type and configure the HCA accordingly.
+	if (ctx_set_link_layer(ctx->context,user_parm)) {
+		fprintf(stderr, " Couldn't set the link layer\n");
+		return NULL;
+	}
+	
+	if (user_parm->use_event) {
+		ctx->channel = ibv_create_comp_channel(ctx->context);
+		if (!ctx->channel) {
+			fprintf(stderr, "Couldn't create completion channel\n");
+			return NULL;
+		}
+	} else
+		ctx->channel = NULL;                  
+
+	ctx->pd = ibv_alloc_pd(ctx->context);
+	if (!ctx->pd) {
+		fprintf(stderr, "Couldn't allocate PD\n");
+		return NULL;
+	}
+
+	for (i = 0; i < user_parm->num_of_qps; i++) {
+
+		m_size = (BUFF_SIZE(user_parm->size) + IF_UD_ADD(user_parm->connection_type))*duplex_ind;
+		ctx->buf[i] = memalign(page_size,m_size);
+		if (!ctx->buf[i]) {
+			fprintf(stderr, "Couldn't allocate work buf.\n");
+			return NULL;
+		}
+		memset(ctx->buf[i],0,m_size);
+
+		// We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says :
+		// The Consumer is not allowed to assign Remote Write or Remote Atomic to
+		// a Memory Region that has not been assigned Local Write. 
+		ctx->mr[i] = ibv_reg_mr(ctx->pd,
+								ctx->buf[i],
+								m_size,
+								IBV_ACCESS_REMOTE_WRITE | 
+								IBV_ACCESS_LOCAL_WRITE);
+
+		if (!ctx->mr[i]) {
+			fprintf(stderr, "Couldn't allocate MR\n");
+			return NULL;
+		}
+	}
+
+	// Create the CQ according to Client/Server or Duplex setting.
+	ctx->cq = ctx_cq_create(ctx->context,ctx->channel,user_parm);
+	if (ctx->cq == NULL) {
+		fprintf(stderr, "Couldn't create CQ \n");
+		return NULL;
+	}
+
+	ALLOCATE(ctx->qp,struct ibv_qp*,user_parm->num_of_qps);
+	
+	for(i=0; i < user_parm->num_of_qps; i++) {
+		ctx->qp[i] = ctx_qp_create(ctx->pd,ctx->cq,ctx->cq,user_parm);
+		if (ctx->qp[i] == NULL) {
+			return NULL;
+		}
+
+		if(ctx_modify_qp_to_init(ctx->qp[i],user_parm)) {
+			return NULL;
+		}
+	}
+
+	return ctx;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn,
+			              struct pingpong_dest *dest, 
+						  struct perftest_parameters *user_parm)
+{
+	struct ibv_qp_attr attr;
+	memset(&attr, 0, sizeof attr);
+	int i;
+
+	attr.qp_state 		= IBV_QPS_RTR;
+	attr.path_mtu       = user_parm->curr_mtu;
+    attr.dest_qp_num    = dest->qpn;
+	attr.rq_psn         = dest->psn;
+	attr.ah_attr.dlid   = dest->lid;
+	if (user_parm->connection_type == RC) {
+		attr.max_dest_rd_atomic     = 1;
+		attr.min_rnr_timer          = 12;
+	}
+	if (user_parm->gid_index < 0) {
+		attr.ah_attr.is_global  = 0;
+		attr.ah_attr.sl         = user_parm->sl;
+	} else {
+		attr.ah_attr.is_global  = 1;
+		attr.ah_attr.grh.dgid   = dest->gid;
+		attr.ah_attr.grh.sgid_index = user_parm->gid_index;
+		attr.ah_attr.grh.hop_limit = 1;
+		attr.ah_attr.sl         = 0;
+	}
+	attr.ah_attr.src_path_bits = 0;
+	attr.ah_attr.port_num   = user_parm->ib_port;
+	
+	if (user_parm->connection_type == RC) {
+		if (ibv_modify_qp(ctx->qp[0], &attr,
+				  IBV_QP_STATE              |
+				  IBV_QP_AV                 |
+				  IBV_QP_PATH_MTU           |
+				  IBV_QP_DEST_QPN           |
+				  IBV_QP_RQ_PSN             |
+				  IBV_QP_MIN_RNR_TIMER      |
+				  IBV_QP_MAX_DEST_RD_ATOMIC)) {
+			fprintf(stderr, "Failed to modify RC QP to RTR\n");
+			return 1;
+		}
+		attr.timeout            = user_parm->qp_timeout;
+		attr.retry_cnt          = 7;
+		attr.rnr_retry          = 7;
+	} else if (user_parm->connection_type == UC) {
+		if (ibv_modify_qp(ctx->qp[0], &attr,
+				  IBV_QP_STATE              |
+				  IBV_QP_AV                 |
+				  IBV_QP_PATH_MTU           |
+				  IBV_QP_DEST_QPN           |
+				  IBV_QP_RQ_PSN)) {
+			fprintf(stderr, "Failed to modify UC QP to RTR\n");
+			return 1;
+		}
+	} 
+	 
+	else {
+		for (i = 0; i < user_parm->num_of_qps; i++) {
+			if (ibv_modify_qp(ctx->qp[i],&attr,IBV_QP_STATE )) {
+				fprintf(stderr, "Failed to modify UD QP to RTR\n");
+				return 1;
+			}
+		}
+		if (user_parm->machine == CLIENT || user_parm->duplex) {
+			ctx->ah = ibv_create_ah(ctx->pd,&attr.ah_attr);
+			if (!ctx->ah) {
+				fprintf(stderr, "Failed to create AH for UD\n");
+				return 1;
+			}
+		}
+	}
+
+	if (user_parm->machine == CLIENT || user_parm->duplex) {
+
+		attr.qp_state 	    = IBV_QPS_RTS;
+		attr.sq_psn 	    = my_psn;
+		if (user_parm->connection_type == RC) {
+			attr.max_rd_atomic  = 1;
+			if (ibv_modify_qp(ctx->qp[0], &attr,
+					IBV_QP_STATE              |
+					IBV_QP_SQ_PSN             |
+					IBV_QP_TIMEOUT            |
+					IBV_QP_RETRY_CNT          |
+					IBV_QP_RNR_RETRY          |
+					IBV_QP_MAX_QP_RD_ATOMIC)) {
+				fprintf(stderr, "Failed to modify RC QP to RTS\n");
+				return 1;
+			}
+
+		} else {
+			if(ibv_modify_qp(ctx->qp[0],&attr,IBV_QP_STATE |IBV_QP_SQ_PSN)) {
+				fprintf(stderr, "Failed to modify UC QP to RTS\n");
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static int set_recv_wqes(struct pingpong_context *ctx,
+						 struct perftest_parameters *user_param) {
+						
+	int					i,j,buff_size;
+	int 				duplex_ind;
+	struct ibv_recv_wr  *bad_wr_recv;
+
+	i = (user_param->duplex && user_param->use_mcg) ? 1 : 0;
+	duplex_ind = (user_param->duplex && !user_param->use_mcg) ? 1 : 0;
+
+	buff_size = BUFF_SIZE(ctx->size) + IF_UD_ADD(user_param->connection_type);
+
+	while (i < user_param->num_of_qps) {
+
+		ctx->sge_list[i].addr   = (uintptr_t)ctx->buf[i] + duplex_ind*buff_size;
+
+		if (user_param->connection_type == UD) 
+			ctx->sge_list[i].addr += (CACHE_LINE_SIZE - UD_ADDITION);
+
+		ctx->sge_list[i].length = SIZE(user_param->connection_type,user_param->size);
+		ctx->sge_list[i].lkey   = ctx->mr[i]->lkey;
+		ctx->rwr[i].sg_list     = &ctx->sge_list[i];
+		ctx->rwr[i].wr_id       = i;
+		ctx->rwr[i].next        = NULL;
+		ctx->rwr[i].num_sge	    = MAX_RECV_SGE;
+		ctx->my_addr[i]		    = (uintptr_t)ctx->buf[i] + duplex_ind*buff_size;
+		
+		for (j = 0; j < user_param->rx_depth; ++j) {
+
+			if (ibv_post_recv(ctx->qp[i],&ctx->rwr[i],&bad_wr_recv)) {
+				fprintf(stderr, "Couldn't post recv Qp = %d: counter=%d\n",i,j);
+				return 1;
+			}
+
+			if (user_param->size <= (CYCLE_BUFFER / 2))
+				increase_loc_addr(&ctx->sge_list[i],user_param->size,j,ctx->my_addr[i],user_param->connection_type);
+		}
+		i++;
+	}
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static void set_send_wqe(struct pingpong_context *ctx,int rem_qpn,
+						 struct perftest_parameters *user_param) {
+
+	ctx->list.addr     = (uintptr_t)ctx->buf[0];
+	ctx->list.lkey 	   = ctx->mr[0]->lkey;
+
+	ctx->wr.sg_list    = &ctx->list;
+	ctx->wr.num_sge    = 1;
+	ctx->wr.opcode     = IBV_WR_SEND;
+	ctx->wr.next       = NULL;
+	ctx->wr.wr_id      = PINGPONG_SEND_WRID;
+	ctx->wr.send_flags = IBV_SEND_SIGNALED;
+
+	if (user_param->connection_type == UD) {
+		ctx->wr.wr.ud.ah          = ctx->ah;
+		ctx->wr.wr.ud.remote_qkey = DEF_QKEY;
+		ctx->wr.wr.ud.remote_qpn  = rem_qpn;
+	}
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static int pp_drain_qp(struct pingpong_context *ctx,
+						struct perftest_parameters *user_param,
+						int psn,struct pingpong_dest *dest,
+						struct mcast_parameters *mcg_params) {
+
+	struct ibv_qp_attr attr;
+	struct ibv_wc      wc;
+	int                i;
+
+	memset(&attr, 0, sizeof attr);
+	attr.qp_state = IBV_QPS_ERR;
+
+	for (i = 0; i <  user_param->num_of_qps; i++) {
+
+		if (ibv_modify_qp(ctx->qp[i],&attr,IBV_QP_STATE)) {
+			fprintf(stderr, "Failed to modify RC QP to ERR\n");
+			return 1;
+		}
+
+		while (ibv_poll_cq(ctx->cq,1,&wc));
+   
+		attr.qp_state = IBV_QPS_RESET;
+
+		if (ibv_modify_qp(ctx->qp[i],&attr,IBV_QP_STATE)) {
+			fprintf(stderr, "Failed to modify RC QP to RESET\n");
+			return 1;
+		}
+
+		if(ctx_modify_qp_to_init(ctx->qp[i],user_param)) {
+			return 1;
+		}
+
+		if (user_param->use_mcg) {
+
+			if ((!user_param->duplex && user_param->machine == SERVER) || (user_param->duplex && i > 0)) {
+				if (ibv_attach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) {
+					fprintf(stderr, "Couldn't attach QP to MultiCast group");
+					return 1;
+				}
+			}
+		}
+	}
+
+	if (pp_connect_ctx(ctx,psn,dest,user_param)) {
+		return 1;
+	}
+
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+static void print_report(struct perftest_parameters *user_param) {
+
+	double cycles_to_units;
+	unsigned long tsize;	/* Transferred size, in megabytes */
+	int i, j;
+	int opt_posted = 0, opt_completed = 0;
+	cycles_t opt_delta;
+	cycles_t t;
+
+
+	opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
+
+	if (user_param->noPeak == OFF) {
+		/* Find the peak bandwidth, unless asked not to in command line */
+		for (i = 0; i < user_param->iters; ++i)
+			for (j = i; j < user_param->iters; ++j) {
+				t = (tcompleted[j] - tposted[i]) / (j - i + 1);
+				if (t < opt_delta) {
+					opt_delta  = t;
+					opt_posted = i;
+					opt_completed = j;
+				}
+			}
+	}
+
+#if !(defined(__sparc))
+	cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000;
+#else
+	cycles_to_units = 1000000000;
+#endif
+
+	tsize = user_param->duplex ? 2 : 1;
+	tsize = tsize * user_param->size;
+	printf(REPORT_FMT,user_param->size,user_param->iters,(user_param->noPeak == OFF) * tsize * cycles_to_units / opt_delta / 0x100000,
+	       tsize * user_param->iters * cycles_to_units /(tcompleted[user_param->iters - 1] - tposted[0]) / 0x100000);
+}
+
+/****************************************************************************** 
+ * Important note :															  
+ * In case of UD/UC this is NOT the way to measureBW since we are running with 
+ * loop on the send side , while we should run on the recieve side or enable 
+ * retry in SW , Since the sender may be faster than the reciver.
+ * Although	we had posted recieve it is not enough and might end this will
+ * result in deadlock of test since both sides are stuck on poll cq.
+ * In this test i do not solve this for the general test ,need to write
+ * seperate test for UC/UD but in case the tx_depth is ~1/3 from the
+ * number of iterations this should be ok .
+ * Also note that the sender is limited in the number of send, ans
+ * i try to make the reciver full .
+ ******************************************************************************/
+int run_iter_bi(struct pingpong_context *ctx, 
+				struct perftest_parameters *user_param)  {
+
+	int                     scnt    = 0;
+	int 					ccnt    = 0;
+	int 					rcnt    = 0;
+	int 					i       = 0;
+	int 					num_of_qps = user_param->num_of_qps;
+	int 					ne;
+	struct ibv_wc 			*wc          = NULL;
+	int 					*rcnt_for_qp = NULL;
+	struct ibv_recv_wr      *bad_wr_recv = NULL;
+	struct ibv_send_wr 		*bad_wr      = NULL;
+
+	ALLOCATE(rcnt_for_qp,int,user_param->num_of_qps);
+	ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);
+	memset(rcnt_for_qp,0,sizeof(int)*user_param->num_of_qps);
+
+	if (user_param->use_mcg)
+		num_of_qps--; 
+	
+	// Set the length of the scatter in case of ALL option.
+	ctx->list.length = user_param->size;
+	ctx->list.addr   = (uintptr_t)ctx->buf[0];
+	ctx->wr.send_flags = IBV_SEND_SIGNALED;
+	
+	if (user_param->size <= user_param->inline_size) 
+		ctx->wr.send_flags |= IBV_SEND_INLINE; 
+
+	while (ccnt < user_param->iters || rcnt < user_param->iters) {
+                
+		while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth / 2) {
+
+			if (scnt %  CQ_MODERATION == 0 && CQ_MODERATION > 1)
+				ctx->wr.send_flags &= ~IBV_SEND_SIGNALED;
+
+			tposted[scnt] = get_cycles();
+			if (ibv_post_send(ctx->qp[0],&ctx->wr, &bad_wr)) {
+				fprintf(stderr, "Couldn't post send: scnt=%d\n",scnt);
+				return 1;
+			}
+
+			if (user_param->size <= (CYCLE_BUFFER / 2))
+				increase_loc_addr(&ctx->list,user_param->size,scnt,(uintptr_t)ctx->buf[0],0);
+
+			++scnt;
+
+			if ((scnt % CQ_MODERATION) == (CQ_MODERATION - 1) || scnt == (user_param->iters - 1)) 
+				ctx->wr.send_flags |= IBV_SEND_SIGNALED;
+		}
+
+		if (user_param->use_event) {
+
+			if (ctx_notify_events(ctx->cq,ctx->channel)) {
+				fprintf(stderr,"Failed to notify events to CQ");
+				return 1;
+			}
+		}
+
+		do {
+			ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);
+			if (ne > 0) {
+				for (i = 0; i < ne; i++) {
+					
+					if (wc[i].status != IBV_WC_SUCCESS)
+						 NOTIFY_COMP_ERROR_SEND(wc[i],scnt,ccnt);
+
+					if ((int) wc[i].wr_id == PINGPONG_SEND_WRID) {
+						ccnt += CQ_MODERATION;
+						if (ccnt >= user_param->iters - 1) 
+							tcompleted[user_param->iters - 1] = get_cycles();
+
+						else 
+							tcompleted[ccnt - 1] = get_cycles();
+					}
+
+					else {
+
+						rcnt_for_qp[wc[i].wr_id]++;
+						rcnt++;
+						if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {
+							fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%d\n",(int)wc[i].wr_id , rcnt_for_qp[wc[i].wr_id]);
+							return 15;
+						}
+
+						if (user_param->size <= (CYCLE_BUFFER / 2))
+							increase_loc_addr(&ctx->sge_list[wc[i].wr_id],
+							  user_param->size,rcnt_for_qp[wc[i].wr_id] + user_param->rx_depth - 1,
+							  ctx->my_addr[wc[i].wr_id],user_param->connection_type);	
+					}
+				}
+			}
+		} while (ne > 0);
+
+		if (ne < 0) {
+			fprintf(stderr, "poll CQ failed %d\n", ne);
+			return 1;
+		}
+	}
+	
+	if (user_param->size <= user_param->inline_size) 
+		ctx->wr.send_flags &= ~IBV_SEND_INLINE;
+	
+	free(rcnt_for_qp);
+	free(wc);
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+int run_iter_uni_server(struct pingpong_context *ctx, 
+						struct perftest_parameters *user_param) {
+
+	int 				rcnt = 0;
+	int 				ne,i;
+	int                 *rcnt_for_qp = NULL;
+	struct ibv_wc 		*wc          = NULL;
+	struct ibv_recv_wr  *bad_wr_recv = NULL;
+
+	ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);
+	ALLOCATE(rcnt_for_qp,int,user_param->num_of_qps);
+
+	memset(rcnt_for_qp,0,sizeof(int)*user_param->num_of_qps);
+
+	while (rcnt < user_param->iters) {
+
+		if (user_param->use_event) {
+			if (ctx_notify_events(ctx->cq,ctx->channel)) {
+				fprintf(stderr ," Failed to notify events to CQ");
+				return 1;
+			}
+		}
+		
+		do {
+			ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);
+			if (ne > 0) {
+				for (i = 0; i < ne; i++) {
+					
+					if (wc[i].status != IBV_WC_SUCCESS) 
+						NOTIFY_COMP_ERROR_RECV(wc[i],rcnt_for_qp[wc[i].wr_id]);
+						
+					rcnt_for_qp[wc[i].wr_id]++;
+					tcompleted[rcnt++] = get_cycles();
+
+				   	if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {
+						fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%d\n",(int)wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]);
+						return 15;
+					}
+
+					if (user_param->size <= (CYCLE_BUFFER / 2))
+						increase_loc_addr(&ctx->sge_list[wc[i].wr_id],user_param->size,
+										  rcnt_for_qp[wc[i].wr_id] + user_param->rx_depth,
+										  ctx->my_addr[wc[i].wr_id],user_param->connection_type);						
+				}
+			}
+		} while (ne > 0);
+
+		if (ne < 0) {
+			fprintf(stderr, "Poll Recieve CQ failed %d\n", ne);
+			return 1;
+		}
+	}
+
+	tposted[0] = tcompleted[0];
+	free(wc);
+	free(rcnt_for_qp);
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+int run_iter_uni_client(struct pingpong_context *ctx, 
+						struct perftest_parameters *user_param) {
+
+	int 		       ne;
+	int 			   i    = 0;
+	int                scnt = 0;
+	int                ccnt = 0;
+	struct ibv_wc      *wc     = NULL;
+	struct ibv_send_wr *bad_wr = NULL;
+
+	ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);
+
+	// Set the lenght of the scatter in case of ALL option.
+	ctx->list.length = user_param->size;
+	ctx->list.addr   = (uintptr_t)ctx->buf[0];
+	ctx->wr.send_flags = IBV_SEND_SIGNALED; 
+
+	if (user_param->size <= user_param->inline_size) 
+		ctx->wr.send_flags |= IBV_SEND_INLINE; 
+	
+
+	while (scnt < user_param->iters || ccnt < user_param->iters) {
+		while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {
+
+			if (scnt %  CQ_MODERATION == 0 && CQ_MODERATION > 1)
+				ctx->wr.send_flags &= ~IBV_SEND_SIGNALED;
+
+			tposted[scnt] = get_cycles();
+			if (ibv_post_send(ctx->qp[0], &ctx->wr, &bad_wr)) {
+				fprintf(stderr, "Couldn't post send: scnt=%d\n",scnt);
+				return 1;
+			}
+
+			if (user_param->size <= (CYCLE_BUFFER / 2))
+				increase_loc_addr(&ctx->list,user_param->size,scnt,(uintptr_t)ctx->buf[0],0);
+
+			scnt++;
+
+			if ((scnt % CQ_MODERATION) == (CQ_MODERATION - 1) || scnt == (user_param->iters - 1)) 
+				ctx->wr.send_flags |= IBV_SEND_SIGNALED;
+		}
+
+		if (ccnt < user_param->iters) {	
+			
+			if (user_param->use_event) {
+				if (ctx_notify_events(ctx->cq,ctx->channel)) {
+					fprintf(stderr , " Failed to notify events to CQ");
+					return 1;
+				}
+			} 
+			do {
+				ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);
+				if (ne > 0) {
+					for (i = 0; i < DEF_WC_SIZE; i++) {
+
+						if (wc[i].status != IBV_WC_SUCCESS) 
+							NOTIFY_COMP_ERROR_SEND(wc[i],scnt,ccnt);
+			
+						ccnt += CQ_MODERATION;
+						if (ccnt >= user_param->iters - 1) 
+							tcompleted[user_param->iters - 1] = get_cycles();
+
+						else 
+							tcompleted[ccnt - 1] = get_cycles();
+					}
+				}
+                         
+					
+			} while (ne > 0);
+
+			if (ne < 0) {
+				fprintf(stderr, "poll CQ failed\n");
+				return 1;
+			}
+		}
+	}
+
+	if (user_param->size <= user_param->inline_size) 
+		ctx->wr.send_flags &= ~IBV_SEND_INLINE;
+
+	free(wc);
+	return 0;
+}
+
+/****************************************************************************** 
+ *
+ ******************************************************************************/
+int main(int argc, char *argv[])
+{
+	struct ibv_device		 	*ib_dev = NULL;
+	struct pingpong_context  	*ctx;
+	struct pingpong_dest	 	my_dest,rem_dest;
+	struct perftest_parameters  user_param;
+	struct mcast_parameters     mcg_params;
+	int                      	i = 0;
+	int                      	size_max_pow = 24;
+	int							size_of_arr;
+
+	// Pointer to The relevent function of run_iter according to machine type.
+	int (*ptr_to_run_iter_uni)(struct pingpong_context*,struct perftest_parameters*);
+
+	/* init default values to user's parameters */
+	memset(&user_param, 0 , sizeof(struct perftest_parameters));
+	memset(&mcg_params, 0 , sizeof(struct mcast_parameters));
+	memset(&my_dest   , 0 , sizeof(struct pingpong_dest));
+	memset(&rem_dest   , 0 , sizeof(struct pingpong_dest));
+ 
+	user_param.verb    = SEND;
+	user_param.tst     = BW;
+	user_param.version = VERSION;
+
+	if (parser(&user_param,argv,argc)) 
+		return 1;
+
+	printf(RESULT_LINE);
+
+	user_param.rx_depth = (user_param.iters < user_param.rx_depth) ? user_param.iters : user_param.rx_depth ;
+
+    if (user_param.use_mcg) {
+
+		user_param.connection_type = UD;
+		if (user_param.duplex) {
+			user_param.num_of_qps++;
+			printf("                    Send Bidirectional BW  -  Multicast Test\n");
+		}
+		else {
+			printf("                    Send BW  -  Multicast Test\n");
+			if (user_param.machine == CLIENT)
+				user_param.num_of_qps = 1;
+		}
+    }
+
+	else if (user_param.duplex) {
+		    printf("                    Send Bidirectional BW Test\n");
+	} else 
+		    printf("                    Send BW Test\n");
+
+	if (user_param.use_event) 
+		printf(" Test with events.\n");
+
+	if (user_param.connection_type == RC)
+		printf(" Connection type : RC\n");
+	else if (user_param.connection_type == UC)
+		printf(" Connection type : UC\n");
+	else{
+		printf(" Connection type : UD\n");
+	}
+	
+	// Done with parameter parsing. Perform setup.
+	if (user_param.all == ON) {
+		// since we run all sizes 
+		user_param.size = MAX_SIZE;
+	}
+
+	srand48(getpid() * time(NULL));
+	page_size = sysconf(_SC_PAGESIZE);
+
+	ib_dev = ctx_find_dev(user_param.ib_devname);
+	if (!ib_dev)
+		return 7;
+
+	mcg_params.ib_devname = ibv_get_device_name(ib_dev);
+
+	ctx = pp_init_ctx(ib_dev,&user_param);
+	if (!ctx)
+		return 1;
+
+	// Set up the Connection.
+	if (set_up_connection(ctx,&user_param,&my_dest,&mcg_params)) {
+		fprintf(stderr," Unable to set up socket connection\n");
+		return 1;
+	}	
+
+	// Init the connection and print the local data.
+	if (init_connection(&user_param,&my_dest)) {
+		fprintf(stderr," Unable to init the socket connection\n");
+		return 1;
+	}
+
+	// shaking hands and gather the other side info.
+    if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+        fprintf(stderr,"Failed to exchange date between server and clients\n");
+        return 1;
+        
+    }
+	// For printing only MGID in the remote side.
+	user_param.side = REMOTE;
+	ctx_print_pingpong_data(&rem_dest,&user_param);
+
+	// Joining the Send side port the Mcast gid
+	if (user_param.use_mcg && (user_param.machine == CLIENT || user_param.duplex)) {
+		memcpy(mcg_params.mgid.raw, rem_dest.gid.raw, 16);
+		if (set_mcast_group(ctx,&user_param,&mcg_params)) {
+			fprintf(stderr," Unable to Join Sender to Mcast gid\n");
+			return 1;
+		}
+	}
+
+	// Prepare IB resources for rtr/rts.
+	if (pp_connect_ctx(ctx,my_dest.psn,&rem_dest,&user_param)) {
+		fprintf(stderr," Unable to Connect the HCA's through the link\n");
+		return 1;
+	}
+	
+	// shaking hands and gather the other side info.
+    if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+        fprintf(stderr,"Failed to exchange date between server and clients\n");
+        return 1;
+        
+    }
+
+	if (user_param.use_event) {
+		if (ibv_req_notify_cq(ctx->cq, 0)) {
+			fprintf(stderr, " Couldn't request CQ notification\n");
+			return 1;
+		} 
+	}
+
+	printf(RESULT_LINE);
+	printf(RESULT_FMT);
+
+	size_of_arr = (user_param.duplex) ? 1 : user_param.num_of_qps;
+
+	ALLOCATE(tposted,cycles_t,user_param.iters*size_of_arr);
+	ALLOCATE(tcompleted,cycles_t,user_param.iters*size_of_arr);
+
+	if (user_param.machine == SERVER || user_param.duplex) {
+		ALLOCATE(ctx->rwr,struct ibv_recv_wr,user_param.num_of_qps);
+		ALLOCATE(ctx->sge_list,struct ibv_sge,user_param.num_of_qps);
+		ALLOCATE(ctx->my_addr ,uint64_t ,user_param.num_of_qps);
+	}
+
+	ptr_to_run_iter_uni = (user_param.machine == CLIENT) ?	&run_iter_uni_client : &run_iter_uni_server;
+	
+	if (user_param.machine == SERVER && !user_param.duplex) {
+		user_param.noPeak = ON;
+	}
+
+	if (user_param.machine == CLIENT || user_param.duplex) {
+		set_send_wqe(ctx,rem_dest.qpn,&user_param);
+	}
+
+	if (user_param.all == ON) {
+
+		if (user_param.connection_type == UD) 
+		   size_max_pow =  (int)UD_MSG_2_EXP(MTU_SIZE(user_param.curr_mtu)) + 1;
+
+		for (i = 1; i < size_max_pow ; ++i) {
+			user_param.size = 1 << i;
+
+			if (user_param.machine == SERVER || user_param.duplex) {
+				if (set_recv_wqes(ctx,&user_param)) {
+					fprintf(stderr," Failed to post receive recv_wqes\n");
+					return 1;
+				}
+			}
+
+			if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+				fprintf(stderr,"Failed to exchange date between server and clients\n");
+				return 1;
+			}
+
+			if (user_param.duplex) {
+				if(run_iter_bi(ctx,&user_param))
+					return 17;
+			} else {
+				if((*ptr_to_run_iter_uni)(ctx,&user_param))
+					return 17;
+			}
+			print_report(&user_param);
+
+			if (pp_drain_qp(ctx,&user_param,my_dest.psn,&rem_dest,&mcg_params)) {
+				fprintf(stderr,"Failed to drain Recv queue (performance optimization)\n");
+				return 1;
+			}
+
+			if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+				fprintf(stderr,"Failed to exchange date between server and clients\n");
+				return 1;
+			}
+        
+		}
+
+	} else {
+
+		if (user_param.machine == SERVER || user_param.duplex) {
+			if (set_recv_wqes(ctx,&user_param)) {
+				fprintf(stderr," Failed to post receive recv_wqes\n");
+				return 1;
+			}
+		}
+
+		if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+			fprintf(stderr,"Failed to exchange date between server and clients\n");
+			return 1;
+		}
+
+		if (user_param.duplex) {
+			if(run_iter_bi(ctx,&user_param))
+				return 18;
+
+		} else {
+			if((*ptr_to_run_iter_uni)(ctx,&user_param))
+				return 18;
+		}
+
+		print_report(&user_param);	
+	}
+		
+	if (ctx_close_connection(&user_param,&my_dest,&rem_dest)) {
+		fprintf(stderr," Failed to close connection between server and client\n");
+		return 1;
+	}
+
+	printf(RESULT_LINE);
+	return destroy_ctx_resources(ctx,&user_param,&my_dest,&rem_dest,&mcg_params);
+}
diff -r -u /tmp/perftest-1.3.0/send_lat.c perftest-1.3.0/send_lat.c
--- /tmp/perftest-1.3.0/send_lat.c	Wed Mar  2 16:04:50 2011
+++ perftest-1.3.0/send_lat.c	Fri Aug 26 05:29:53 2011
@@ -61,7 +61,8 @@
 	struct ibv_sge          *sge_list;
 	struct ibv_recv_wr      *rwr;
 	struct ibv_context      *context;
-	struct ibv_comp_channel *channel;
+	struct ibv_comp_channel *rx_channel;
+	struct ibv_comp_channel *tx_channel;
 	struct ibv_pd           *pd;
 	struct ibv_mr           *mr;
 	struct ibv_cq           *rcq;
@@ -259,13 +260,20 @@
 		test_result = 1;
 	}
 
-	if (ctx->channel) {
-		if (ibv_destroy_comp_channel(ctx->channel)) {
-			fprintf(stderr, "failed to destroy channel \n");
+	if (ctx->rx_channel) {
+		if (ibv_destroy_comp_channel(ctx->rx_channel)) {
+			fprintf(stderr, "failed to destroy rx_channel \n");
 			test_result = 1;
 		}
 	}
 	
+	if (ctx->tx_channel) {
+		if (ibv_destroy_comp_channel(ctx->tx_channel)) {
+			fprintf(stderr, "failed to destroy tx_channel \n");
+			test_result = 1;
+		}
+	}
+	
 	if (ibv_close_device(ctx->context)) {
 		fprintf(stderr, "failed to close device context\n");
 		test_result = 1;
@@ -328,13 +336,20 @@
 	memset(ctx->buf, 0,buff_size);
 
     if (user_parm->use_event) {
-		ctx->channel = ibv_create_comp_channel(ctx->context);
-		if (!ctx->channel) {
-			fprintf(stderr, "Couldn't create completion channel\n");
+		ctx->rx_channel = ibv_create_comp_channel(ctx->context);
+		if (!ctx->rx_channel) {
+			fprintf(stderr, "Couldn't create completion rx_channel\n");
 			return NULL;
 		}
-	} else
-		ctx->channel = NULL;
+		ctx->tx_channel = ibv_create_comp_channel(ctx->context);
+		if (!ctx->rx_channel) {
+			fprintf(stderr, "Couldn't create completion tx_channel\n");
+			return NULL;
+		}
+	} else {
+		ctx->rx_channel = NULL;
+		ctx->tx_channel = NULL;
+	}
 
 	ctx->pd = ibv_alloc_pd(ctx->context);
 	if (!ctx->pd) {
@@ -348,13 +363,13 @@
 		return NULL;
 	}
 	
-	ctx->scq = ibv_create_cq(ctx->context,user_parm->tx_depth,NULL,ctx->channel,0);
+	ctx->scq = ibv_create_cq(ctx->context,user_parm->tx_depth,NULL,ctx->tx_channel,0);
 	if (!ctx->scq) {
 	    fprintf(stderr, "Couldn't create CQ\n");
 		return NULL;
 	}
 
-	ctx->rcq = ibv_create_cq(ctx->context,user_parm->rx_depth*user_parm->num_of_qps,NULL,ctx->channel,0);
+	ctx->rcq = ibv_create_cq(ctx->context,user_parm->rx_depth*user_parm->num_of_qps,NULL,ctx->rx_channel,0);
 	if (!ctx->rcq) {
 	    fprintf(stderr, "Couldn't create CQ\n");
 		return NULL;
@@ -581,12 +596,20 @@
 	for (i = 0; i < user_param->iters - 1; ++i)
 		delta[i] = tstamp[i + 1] - tstamp[i];
 
-
 	if (user_param->r_flag->cycles) {
+#if !(defined(__sparc))
 		cycles_to_units = 1;
+#else
+		cycles_to_units =
+		        (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000;
+#endif
 		units = "cycles";
 	} else {
+#if !(defined(__sparc))
 		cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f);
+#else
+		cycles_to_units = 1000;
+#endif
 		units = "usec";
 	}
 
@@ -649,7 +672,7 @@
 		  
 			// Server is polling on recieve first .
 		    if (user_param->use_event) {
-				if (ctx_notify_events(ctx->rcq,ctx->channel)) {
+				if (ctx_notify_events(ctx->rcq, ctx->rx_channel)) {
 					fprintf(stderr , " Failed to notify events to CQ");
 					return 1;
 				}
@@ -701,7 +724,7 @@
 		    int s_ne;
 
 		    if (user_param->use_event) {
-				if (ctx_notify_events(ctx->scq,ctx->channel)) {
+				if (ctx_notify_events(ctx->scq, ctx->tx_channel)) {
 					fprintf(stderr , " Failed to notify events to CQ");
 					return 1;
 				}
diff -r -u /tmp/perftest-1.3.0/multicast_resources.c perftest-1.3.0/multicast_resources.c
--- /tmp/perftest-1.3.0/multicast_resources.c	Thu Dec 16 08:21:05 2010
+++ perftest-1.3.0/multicast_resources.c	Fri Feb 11 04:12:48 2011
@@ -4,7 +4,9 @@
 #include <unistd.h>
 #include <string.h>
 #include <limits.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <arpa/inet.h>
 #include <sys/types.h>
 #include <sys/socket.h>
diff -r -u /tmp/perftest-1.3.0/write_bw.c perftest-1.3.0/write_bw.c
--- /tmp/perftest-1.3.0/write_bw.c	Wed Mar  2 11:48:20 2011
+++ perftest-1.3.0/write_bw.c	Tue Mar 15 12:04:28 2011
@@ -45,6 +45,7 @@
 #include <limits.h>
 #include <malloc.h>
 #include <getopt.h>
+#include <inttypes.h>
 #include <time.h>
 #include <infiniband/verbs.h>
 
@@ -224,7 +225,7 @@
 		return NULL;
 	}
 
-	if (is_dev_hermon(ctx->context) != NOT_HERMON && user_parm->inline_size != 0)
+	if (is_dev_hermon(ctx->context) == NOT_HERMON && user_parm->inline_size != 0)
 		user_parm->inline_size = 0;
 
 	printf(" Inline data is used up to %d bytes message\n", user_parm->inline_size);
@@ -368,7 +369,6 @@
 	cycles_t t;
 	int iters = user_param->iters;
 
-
 	opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
 
 	if (user_param->noPeak == OFF) {
@@ -384,7 +384,11 @@
 		  }
 	}
 	
+#if !(defined(__sparc))
 	cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000;
+#else
+	cycles_to_units = 1000000000;
+#endif
 
 	tsize = user_param->duplex ? 2 : 1;
 	tsize = tsize * user_param->size;
diff -r -u /tmp/perftest-1.3.0/write_bw_postlist.c perftest-1.3.0/write_bw_postlist.c
--- /tmp/perftest-1.3.0/write_bw_postlist.c	Thu Mar  3 17:03:54 2011
+++ perftest-1.3.0/write_bw_postlist.c	Tue Mar 15 11:59:53 2011
@@ -46,6 +46,7 @@
 #include <malloc.h>
 #include <getopt.h>
 #include <time.h>
+#include <inttypes.h>
 #include <infiniband/verbs.h>
 
 #include "get_clock.h"
@@ -323,7 +324,11 @@
             }
 	}
 
+#if !(defined(__SVR4) && defined(__sun))
 	cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f) * 1000000;
+#else
+	cycles_to_units = 1000000000;
+#endif
 
 	tsize = user_param->duplex ? 2 : 1;
 	tsize = tsize * user_param->size;
diff -r -u /tmp/perftest-1.3.0/write_lat.c perftest-1.3.0/write_lat.c
--- /tmp/perftest-1.3.0/write_lat.c	Sat Feb 26 01:02:48 2011
+++ perftest-1.3.0/write_lat.c	Tue Mar 15 12:01:35 2011
@@ -46,6 +46,7 @@
 #include <malloc.h>
 #include <getopt.h>
 #include <time.h>
+#include <inttypes.h>
 #include <infiniband/verbs.h>
 
 #include "get_clock.h"
@@ -330,10 +331,19 @@
 
 
 	if (user_param->r_flag->cycles) {
+#if !(defined(__sparc))
 		cycles_to_units = 1;
+#else
+		cycles_to_units =
+		    (1/get_cpu_mhz(user_param->cpu_freq_f)) * 1000;
+#endif
 		units = "cycles";
 	} else {
+#if !(defined(__sparc))
 		cycles_to_units = get_cpu_mhz(user_param->cpu_freq_f);
+#else
+		cycles_to_units = 1000;
+#endif
 		units = "usec";
 	}