usr/src/uts/common/io/ib/clients/iser/iser_ib.c
author Peter Gill <Peter.Gill@Sun.COM>
Mon, 24 May 2010 18:58:35 -0400
changeset 12485 c8d0d0397145
parent 11093 92e0bcf89283
permissions -rw-r--r--
6940166 iSER assertion panic triggered when iSCSI discovery is activated on IB interface 6828570 setting a headerdigest or datadigest flag to an IB port causes the initiator to not see the LUN

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/sunddi.h>
#include <sys/sysmacros.h>
#include <sys/iscsi_protocol.h>

#include <sys/ib/clients/iser/iser.h>
#include <sys/ib/clients/iser/iser_idm.h>

/*
 * iser_ib.c
 * Routines for InfiniBand transport for iSER
 *
 * This file contains the routines to interface with the IBT API to attach and
 * allocate IB resources, handle async events, and post recv work requests.
 *
 */

static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid);
static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid);

static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid);
static int iser_ib_free_hca(iser_hca_t *hca);
static int iser_ib_update_hcaports(iser_hca_t *hca);
static int iser_ib_init_hcas(void);
static int iser_ib_fini_hcas(void);

static iser_sbind_t *iser_ib_get_bind(
    iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid);
static int iser_ib_activate_port(
    idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid);
static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid);

static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size);
static void iser_ib_fini_qp(iser_qp_t *qp);

static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size,
    ibt_cq_hdl_t *cq_hdl);

static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
    ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
    ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs);

static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl,
    ibt_async_event_t *event);
static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl,
    ibt_async_event_t *event);
static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl,
    ibt_async_event_t *event);

static void iser_ib_post_recv_task(void *arg);

static struct ibt_clnt_modinfo_s iser_ib_modinfo = {
	IBTI_V_CURR,
	IBT_STORAGE_DEV,
	iser_ib_async_handler,
	NULL,
	"iSER"
};

/*
 * iser_ib_init
 *
 * This function registers the HCA drivers with IBTF and registers and binds
 * iSER as a service with IBTF.
 */
int
iser_ib_init(void)
{
	int		status;

	/* Register with IBTF */
	status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state,
	    &iser_state->is_ibhdl);
	if (status != DDI_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)",
		    status);
		return (DDI_FAILURE);
	}

	/* Create the global work request kmem_cache */
	iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache",
	    sizeof (iser_wr_t), 0, NULL, NULL, NULL,
	    iser_state, NULL, KM_SLEEP);

	/* Populate our list of HCAs */
	status = iser_ib_init_hcas();
	if (status != DDI_SUCCESS) {
		/* HCAs failed to initialize, tear it down */
		kmem_cache_destroy(iser_state->iser_wr_cache);
		(void) ibt_detach(iser_state->is_ibhdl);
		iser_state->is_ibhdl = NULL;
		ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs");
		return (DDI_FAILURE);
	}

	/* Target will register iSER as a service with IBTF when required */

	/* Target will bind this service when it comes online */

	return (DDI_SUCCESS);
}

/*
 * iser_ib_fini
 *
 * This function unbinds and degisters the iSER service from IBTF
 */
int
iser_ib_fini(void)
{
	/* IDM would have already disabled all the services */

	/* Teardown the HCA list and associated resources */
	if (iser_ib_fini_hcas() != DDI_SUCCESS)
		return (DDI_FAILURE);

	/* Teardown the global work request kmem_cache */
	kmem_cache_destroy(iser_state->iser_wr_cache);

	/* Deregister with IBTF */
	if (iser_state->is_ibhdl != NULL) {
		(void) ibt_detach(iser_state->is_ibhdl);
		iser_state->is_ibhdl = NULL;
	}

	return (DDI_SUCCESS);
}

/*
 * iser_ib_register_service
 *
 * This function registers the iSER service using the RDMA-Aware Service ID.
 */
int
iser_ib_register_service(idm_svc_t *idm_svc)
{
	ibt_srv_desc_t	srvdesc;
	iser_svc_t	*iser_svc;
	int		status;

	bzero(&srvdesc, sizeof (ibt_srv_desc_t));

	/* Set up IBTI client callback handler from the CM */
	srvdesc.sd_handler = iser_ib_cm_handler;

	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;

	iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;

	/* Register the service on the specified port */
	status = ibt_register_service(
	    iser_state->is_ibhdl, &srvdesc,
	    iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL);

	return (status);
}

/*
 * iser_ib_bind_service
 *
 * This function binds a given iSER service on all available HCA ports. The
 * current specification does not allow user to specify transport bindings
 * for each iscsi target. The ULP invokes this function to bind the target
 * to all available iser ports after checking for the presence of an IB HCA.
 * iSER is "configured" whenever an IB-capable IP address exists. The lack
 * of active IB ports is a less-fatal condition, and sockets would be used
 * as the transport even though an Infiniband HCA is configured but unusable.
 *
 */
int
iser_ib_bind_service(idm_svc_t *idm_svc)
{
	iser_hca_t	*hca;
	ib_gid_t	gid;
	int		num_ports = 0;
	int		num_binds = 0;
	int		num_inactive_binds = 0; /* if HCA ports inactive */
	int		status;
	int		i;

	ASSERT(idm_svc != NULL);
	ASSERT(idm_svc->is_iser_svc != NULL);

	/* Register the iSER service on all available ports */
	mutex_enter(&iser_state->is_hcalist_lock);

	for (hca = list_head(&iser_state->is_hcalist);
	    hca != NULL;
	    hca = list_next(&iser_state->is_hcalist, hca)) {

		for (i = 0; i < hca->hca_num_ports; i++) {
			num_ports++;
			if (hca->hca_port_info[i].p_linkstate !=
			    IBT_PORT_ACTIVE) {
				/*
				 * Move on. We will attempt to bind service
				 * in our async handler if the port comes up
				 * at a later time.
				 */
				num_inactive_binds++;
				continue;
			}

			gid = hca->hca_port_info[i].p_sgid_tbl[0];

			/* If the port is already bound, skip */
			if (iser_ib_get_bind(
			    idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) {

				status = iser_ib_activate_port(
				    idm_svc, hca->hca_guid, gid);
				if (status != IBT_SUCCESS) {
					ISER_LOG(CE_NOTE,
					    "iser_ib_bind_service: "
					    "iser_ib_activate_port failure "
					    "(0x%x)", status);
					continue;
				}
			}
			num_binds++;
		}
	}
	mutex_exit(&iser_state->is_hcalist_lock);

	if (num_binds) {
		ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on "
		    "(%d) of (%d) ports", num_binds, num_ports);
		return (ISER_STATUS_SUCCESS);
	} else if (num_inactive_binds) {
		ISER_LOG(CE_NOTE, "iser_ib_bind_service: Could not bind "
		    "service, HCA ports are not active.");
		/*
		 * still considered success, the async handler will bind
		 * the service when the port comes up at a later time
		 */
		return (ISER_STATUS_SUCCESS);
	} else {
		ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service");
		return (ISER_STATUS_FAIL);
	}
}

/*
 * iser_ib_unbind_service
 *
 * This function unbinds a given service on a all HCA ports
 */
void
iser_ib_unbind_service(idm_svc_t *idm_svc)
{
	iser_svc_t	*iser_svc;
	iser_sbind_t	*is_sbind, *next_sb;

	if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {

		iser_svc = idm_svc->is_iser_svc;

		for (is_sbind = list_head(&iser_svc->is_sbindlist);
		    is_sbind != NULL;
		    is_sbind = next_sb) {
			next_sb = list_next(&iser_svc->is_sbindlist, is_sbind);
			(void) ibt_unbind_service(iser_svc->is_srvhdl,
			    is_sbind->is_sbindhdl);
			list_remove(&iser_svc->is_sbindlist, is_sbind);
			kmem_free(is_sbind, sizeof (iser_sbind_t));
		}
	}
}

/* ARGSUSED */
void
iser_ib_deregister_service(idm_svc_t *idm_svc)
{
	iser_svc_t	*iser_svc;

	if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {

		iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
		(void) ibt_deregister_service(iser_state->is_ibhdl,
		    iser_svc->is_srvhdl);
		(void) ibt_release_ip_sid(iser_svc->is_svcid);
	}
}

/*
 * iser_ib_get_paths
 * This function finds the IB path between the local and the remote address.
 *
 */
int
iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip,
    ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip)
{
	ibt_ip_path_attr_t	ipattr;
	int			status;

	(void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
	ipattr.ipa_dst_ip	= remote_ip;
	ipattr.ipa_src_ip	= *local_ip;
	ipattr.ipa_max_paths	= 1;
	ipattr.ipa_ndst		= 1;

	(void) bzero(path, sizeof (ibt_path_info_t));
	status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS,
	    &ipattr, path, NULL, path_src_ip);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths "
		    "failure: status (%d)", status);
		return (status);
	}

	if (local_ip != NULL) {
		ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]",
		    local_ip->un.ip4addr, remote_ip->un.ip4addr);
	} else {
		ISER_LOG(CE_NOTE, "iser_ib_get_paths success: "
		    "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr);
	}

	return (ISER_STATUS_SUCCESS);
}

/*
 * iser_ib_alloc_channel_nopathlookup
 *
 * This function allocates a reliable connected channel. This function does
 * not invoke ibt_get_ip_paths() to do the path lookup. The HCA GUID and
 * port are input to this function.
 */
iser_chan_t *
iser_ib_alloc_channel_nopathlookup(ib_guid_t hca_guid, uint8_t hca_port)
{
	iser_hca_t	*hca;
	iser_chan_t	*chan;

	/* Lookup the hca using the gid in the path info */
	hca = iser_ib_guid2hca(hca_guid);
	if (hca == NULL) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed "
		    "to lookup HCA(%llx) handle", (longlong_t)hca_guid);
		return (NULL);
	}

	chan = iser_ib_alloc_rc_channel(hca, hca_port);
	if (chan == NULL) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed "
		    "to alloc channel on HCA(%llx) %d",
		    (longlong_t)hca_guid, hca_port);
		return (NULL);
	}

	ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: "
	    "chanhdl (0x%p), HCA(%llx) %d",
	    (void *)chan->ic_chanhdl, (longlong_t)hca_guid, hca_port);

	return (chan);
}

/*
 * iser_ib_alloc_channel_pathlookup
 *
 * This function allocates a reliable connected channel but first invokes
 * ibt_get_ip_paths() with the given local and remote addres to get the
 * HCA lgid and the port number.
 */
iser_chan_t *
iser_ib_alloc_channel_pathlookup(
    ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip)
{
	ibt_path_info_t		ibt_path;
	ibt_path_ip_src_t	path_src_ip;
	ib_gid_t		lgid;
	uint8_t			hca_port; /* from path */
	iser_hca_t		*hca;
	iser_chan_t		*chan;
	int			status;

	/* Lookup a path to the given destination */
	status = iser_ib_get_paths(
	    local_ip, remote_ip, &ibt_path, &path_src_ip);

	if (status != ISER_STATUS_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: faild "
		    "Path lookup IP:[%llx to %llx] failed: status (%d)",
		    (longlong_t)local_ip->un.ip4addr,
		    (longlong_t)remote_ip->un.ip4addr,
		    status);
		return (NULL);
	}

	/* get the local gid from the path info */
	lgid = ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;

	/* get the hca port from the path info */
	hca_port = ibt_path.pi_prim_cep_path.cep_hca_port_num;

	/* Lookup the hca using the gid in the path info */
	hca = iser_ib_gid2hca(lgid);
	if (hca == NULL) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed "
		    "to lookup HCA (%llx) handle",
		    (longlong_t)hca->hca_guid);
		return (NULL);
	}

	chan = iser_ib_alloc_rc_channel(hca, hca_port);
	if (chan == NULL) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed "
		    "to alloc channel from IP:[%llx to %llx] on HCA (%llx) %d",
		    (longlong_t)local_ip->un.ip4addr,
		    (longlong_t)remote_ip->un.ip4addr,
		    (longlong_t)hca->hca_guid, hca_port);
		return (NULL);
	}

	ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: "
	    "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d",
	    (void *)chan->ic_chanhdl,
	    (longlong_t)local_ip->un.ip4addr,
	    (longlong_t)remote_ip->un.ip4addr,
	    (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid,
	    (longlong_t)hca->hca_guid, hca_port);

	chan->ic_ibt_path	= ibt_path;
	chan->ic_localip	= path_src_ip.ip_primary;
	chan->ic_remoteip	= *remote_ip;

	return (chan);
}

/*
 * iser_ib_alloc_rc_channel
 *
 * This function allocates a reliable communication channel using the specified
 * channel attributes.
 */
iser_chan_t *
iser_ib_alloc_rc_channel(iser_hca_t *hca, uint8_t hca_port)
{

	iser_chan_t			*chan;
	ibt_rc_chan_alloc_args_t	chanargs;
	uint_t				sq_size, rq_size;
	int				status;

	chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP);

	mutex_init(&chan->ic_chan_lock, NULL, MUTEX_DRIVER, NULL);
	mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL);

	/* Set up the iSER channel handle with HCA */
	chan->ic_hca		= hca;

	/*
	 * Determine the queue sizes, based upon the HCA query data.
	 * For our Work Queues, we will use either our default value,
	 * or the HCA's maximum value, whichever is smaller.
	 */
	sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE);
	rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE);

	/*
	 * For our Completion Queues, we again check the device maximum.
	 * We want to end up with CQs that are the next size up from the
	 * WQs they are servicing so that they have some overhead.
	 */
	if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) {
		chan->ic_sendcq_sz = sq_size + 1;
	} else {
		chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz;
		sq_size = chan->ic_sendcq_sz - 1;
	}

	if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) {
		chan->ic_recvcq_sz = rq_size + 1;
	} else {
		chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz;
		rq_size = chan->ic_recvcq_sz - 1;
	}

	/* Initialize the iSER channel's QP handle */
	iser_ib_init_qp(chan, sq_size, rq_size);

	/* Set up the Send Completion Queue */
	status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz,
	    &chan->ic_sendcq);
	if (status != ISER_STATUS_SUCCESS) {
		iser_ib_fini_qp(&chan->ic_qp);
		mutex_destroy(&chan->ic_chan_lock);
		mutex_destroy(&chan->ic_sq_post_lock);
		kmem_free(chan, sizeof (iser_chan_t));
		return (NULL);
	}
	ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan);
	(void) ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION);

	/* Set up the Receive Completion Queue */
	status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz,
	    &chan->ic_recvcq);
	if (status != ISER_STATUS_SUCCESS) {
		(void) ibt_free_cq(chan->ic_sendcq);
		iser_ib_fini_qp(&chan->ic_qp);
		mutex_destroy(&chan->ic_chan_lock);
		mutex_destroy(&chan->ic_sq_post_lock);
		kmem_free(chan, sizeof (iser_chan_t));
		return (NULL);
	}
	ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan);
	(void) ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION);

	/* Setup the channel arguments */
	iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq,
	    sq_size, rq_size, hca->hca_pdhdl, &chanargs);

	status = ibt_alloc_rc_channel(hca->hca_hdl,
	    IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
		    "ibt_alloc_rc_channel: status (%d)", status);
		(void) ibt_free_cq(chan->ic_sendcq);
		(void) ibt_free_cq(chan->ic_recvcq);
		iser_ib_fini_qp(&chan->ic_qp);
		mutex_destroy(&chan->ic_chan_lock);
		mutex_destroy(&chan->ic_sq_post_lock);
		kmem_free(chan, sizeof (iser_chan_t));
		return (NULL);
	}

	/* Set the 'channel' as the client private data */
	(void) ibt_set_chan_private(chan->ic_chanhdl, chan);

	return (chan);
}

/*
 * iser_ib_open_rc_channel
 * This function opens a RC connection on the given allocated RC channel
 */
int
iser_ib_open_rc_channel(iser_chan_t *chan)
{
	ibt_ip_cm_info_t	ipcm_info;
	iser_private_data_t	iser_priv_data;
	ibt_chan_open_args_t	ocargs;
	ibt_rc_returns_t	ocreturns;
	int			status;

	mutex_enter(&chan->ic_chan_lock);

	/*
	 * For connection establishment, the initiator sends a CM REQ using the
	 * iSER RDMA-Aware Service ID. Included are the source and destination
	 * IP addresses, and the src port.
	 */
	bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
	ipcm_info.src_addr = chan->ic_localip;
	ipcm_info.dst_addr = chan->ic_remoteip;
	ipcm_info.src_port = chan->ic_lport;

	/*
	 * The CM Private Data field defines the iSER connection parameters
	 * such as zero based virtual address exception (ZBVAE) and Send with
	 * invalidate Exception (SIE).
	 *
	 * Solaris IBT does not currently support ZBVAE or SIE.
	 */
	iser_priv_data.rsvd1	= 0;
	iser_priv_data.sie	= 1;
	iser_priv_data.zbvae	= 1;

	status = ibt_format_ip_private_data(&ipcm_info,
	    sizeof (iser_private_data_t), &iser_priv_data);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
		mutex_exit(&chan->ic_chan_lock);
		return (status);
	}

	/*
	 * Set the SID we are attempting to connect to, based upon the
	 * remote port number.
	 */
	chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport);

	/* Set up the args for the channel open */
	bzero(&ocargs, sizeof (ibt_chan_open_args_t));
	ocargs.oc_path			= &chan->ic_ibt_path;
	ocargs.oc_cm_handler		= iser_ib_cm_handler;
	ocargs.oc_cm_clnt_private	= iser_state;
	ocargs.oc_rdma_ra_out		= 4;
	ocargs.oc_rdma_ra_in		= 4;
	ocargs.oc_path_retry_cnt	= 2;
	ocargs.oc_path_rnr_retry_cnt	= 2;
	ocargs.oc_priv_data_len		= sizeof (iser_private_data_t);
	ocargs.oc_priv_data		= &iser_priv_data;

	bzero(&ocreturns, sizeof (ibt_rc_returns_t));

	status = ibt_open_rc_channel(chan->ic_chanhdl,
	    IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns);

	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
		mutex_exit(&chan->ic_chan_lock);
		return (status);
	}

	mutex_exit(&chan->ic_chan_lock);
	return (IDM_STATUS_SUCCESS);
}

/*
 * iser_ib_close_rc_channel
 * This function closes the RC channel related to this iser_chan handle.
 * We invoke this in a non-blocking, no callbacks context.
 */
void
iser_ib_close_rc_channel(iser_chan_t *chan)
{
	int			status;

	mutex_enter(&chan->ic_chan_lock);
	status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL,
	    0, NULL, NULL, 0);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: "
		    "ibt_close_rc_channel failed: status (%d)", status);
	}
	mutex_exit(&chan->ic_chan_lock);
}

/*
 * iser_ib_free_rc_channel
 *
 * This function tears down an RC channel's QP initialization and frees it.
 * Note that we do not need synchronization here; the channel has been
 * closed already, so we should only have completion polling occuring.  Once
 * complete, we are free to free the IBTF channel, WQ and CQ resources, and
 * our own related resources.
 */
void
iser_ib_free_rc_channel(iser_chan_t *chan)
{
	iser_qp_t	*iser_qp;

	iser_qp = &chan->ic_qp;

	/* Ensure the SQ is empty */
	while (chan->ic_sq_post_count != 0) {
		mutex_exit(&chan->ic_conn->ic_lock);
		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
		mutex_enter(&chan->ic_conn->ic_lock);
	}
	mutex_destroy(&chan->ic_sq_post_lock);

	/* Ensure the RQ is empty */
	(void) ibt_flush_channel(chan->ic_chanhdl);
	mutex_enter(&iser_qp->qp_lock);
	while (iser_qp->rq_level != 0) {
		mutex_exit(&iser_qp->qp_lock);
		mutex_exit(&chan->ic_conn->ic_lock);
		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
		mutex_enter(&chan->ic_conn->ic_lock);
		mutex_enter(&iser_qp->qp_lock);
	}

	/* Free our QP handle */
	mutex_exit(&iser_qp->qp_lock);
	(void) iser_ib_fini_qp(iser_qp);

	/* Free the IBT channel resources */
	(void) ibt_free_channel(chan->ic_chanhdl);
	chan->ic_chanhdl = NULL;

	/* Free the CQs */
	(void) ibt_free_cq(chan->ic_sendcq);
	(void) ibt_free_cq(chan->ic_recvcq);

	/* Free the chan handle */
	mutex_destroy(&chan->ic_chan_lock);
	kmem_free(chan, sizeof (iser_chan_t));
}

/*
 * iser_ib_post_recv
 *
 * This function handles keeping the RQ full on a given channel.
 * This routine will mostly be run on a taskq, and will check the
 * current fill level of the RQ, and post as many WRs as necessary
 * to fill it again.
 */

int
iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl)
{
	iser_chan_t	*chan;
	int		status;

	/* Pull our iSER channel handle from the private data */
	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);

	/*
	 * Caller must check that chan->ic_conn->ic_stage indicates
	 * the connection is active (not closing, not closed) and
	 * it must hold the mutex cross the check and the call to this function
	 */
	ASSERT(mutex_owned(&chan->ic_conn->ic_lock));
	ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_ALLOCATED) &&
	    (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN));
	idm_conn_hold(chan->ic_conn->ic_idmc);
	status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task,
	    (void *)chanhdl, DDI_NOSLEEP);
	if (status != DDI_SUCCESS) {
		idm_conn_rele(chan->ic_conn->ic_idmc);
	}

	return (status);
}

static void
iser_ib_post_recv_task(void *arg)
{
	ibt_channel_hdl_t	chanhdl = arg;
	iser_chan_t		*chan;

	/* Pull our iSER channel handle from the private data */
	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);

	iser_ib_post_recv(chanhdl);
	idm_conn_rele(chan->ic_conn->ic_idmc);
}

void
iser_ib_post_recv(ibt_channel_hdl_t chanhdl)
{
	iser_chan_t	*chan;
	iser_hca_t	*hca;
	iser_msg_t	*msg;
	ibt_recv_wr_t	*wrlist, wr[ISER_IB_RQ_POST_MAX];
	int		rq_space, msg_ret;
	int		total_num, npost;
	uint_t		nposted;
	int		status, i;
	iser_qp_t	*iser_qp;

	/* Pull our iSER channel handle from the private data */
	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);

	ASSERT(chan != NULL);

	mutex_enter(&chan->ic_conn->ic_lock);

	/* Bail out if the connection is closed; no need for more recv WRs */
	if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) ||
	    (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) {
		mutex_exit(&chan->ic_conn->ic_lock);
		return;
	}

	/* get the QP handle from the iser_chan */
	iser_qp = &chan->ic_qp;

	hca = chan->ic_hca;

	if (hca == NULL) {
		ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve "
		    "HCA handle");
		mutex_exit(&chan->ic_conn->ic_lock);
		return;
	}

	/* check for space to post on the RQ */
	mutex_enter(&iser_qp->qp_lock);
	rq_space = iser_qp->rq_depth - iser_qp->rq_level;
	if (rq_space == 0) {
		/* The RQ is full, clear the pending flag and return */
		iser_qp->rq_taskqpending = B_FALSE;
		mutex_exit(&iser_qp->qp_lock);
		mutex_exit(&chan->ic_conn->ic_lock);
		return;
	}

	/* Keep track of the lowest value for rq_min_post_level */
	if (iser_qp->rq_level < iser_qp->rq_min_post_level)
		iser_qp->rq_min_post_level = iser_qp->rq_level;

	mutex_exit(&iser_qp->qp_lock);

	/* we've room to post, so pull from the msg cache */
	msg = iser_msg_get(hca, rq_space, &msg_ret);
	if (msg == NULL) {
		ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles "
		    "available in msg cache currently");
		/*
		 * There are no messages on the cache. Wait a half-
		 * second, then try again.
		 */
		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
		status = iser_ib_post_recv_async(chanhdl);
		if (status != DDI_SUCCESS) {
			ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
			    "redispatch routine");
			/* Failed to dispatch, clear pending flag */
			mutex_enter(&iser_qp->qp_lock);
			iser_qp->rq_taskqpending = B_FALSE;
			mutex_exit(&iser_qp->qp_lock);
		}
		mutex_exit(&chan->ic_conn->ic_lock);
		return;
	}

	if (msg_ret != rq_space) {
		ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of "
		    "messages not allocated: requested (%d) allocated (%d)",
		    rq_space, msg_ret);
		/* We got some, but not all, of our requested depth */
		rq_space = msg_ret;
	}

	/*
	 * Now, walk through the allocated WRs and post them,
	 * ISER_IB_RQ_POST_MAX (or less) at a time.
	 */
	wrlist = &wr[0];
	total_num = rq_space;

	while (total_num) {
		/* determine the number to post on this iteration */
		npost = (total_num > ISER_IB_RQ_POST_MAX) ?
		    ISER_IB_RQ_POST_MAX : total_num;

		/* build a list of WRs from the msg list */
		for (i = 0; i < npost; i++) {
			wrlist[i].wr_id		= (ibt_wrid_t)(uintptr_t)msg;
			wrlist[i].wr_nds	= ISER_IB_SGLIST_SIZE;
			wrlist[i].wr_sgl	= &msg->msg_ds;
			msg = msg->nextp;
		}

		/* post the list to the RQ */
		nposted = 0;
		status = ibt_post_recv(chanhdl, wrlist, npost, &nposted);
		if ((status != IBT_SUCCESS) || (nposted != npost)) {
			ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv "
			    "failed: requested (%d) posted (%d) status (%d)",
			    npost, nposted, status);
			total_num -= nposted;
			break;
		}

		/* decrement total number to post by the number posted */
		total_num -= nposted;
	}

	mutex_enter(&iser_qp->qp_lock);
	if (total_num != 0) {
		ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, "
		    "failed to post (%d) WRs", total_num);
		iser_qp->rq_level += rq_space - total_num;
	} else {
		iser_qp->rq_level += rq_space;
	}

	/*
	 * Now that we've filled the RQ, check that all of the recv WRs
	 * haven't just been immediately consumed. If so, taskqpending is
	 * still B_TRUE, so we need to fire off a taskq thread to post
	 * more WRs.
	 */
	if (iser_qp->rq_level == 0) {
		mutex_exit(&iser_qp->qp_lock);
		status = iser_ib_post_recv_async(chanhdl);
		if (status != DDI_SUCCESS) {
			ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
			    "dispatch followup routine");
			/* Failed to dispatch, clear pending flag */
			mutex_enter(&iser_qp->qp_lock);
			iser_qp->rq_taskqpending = B_FALSE;
			mutex_exit(&iser_qp->qp_lock);
		}
	} else {
		/*
		 * We're done, we've filled the RQ. Clear the taskq
		 * flag so that we can run again.
		 */
		iser_qp->rq_taskqpending = B_FALSE;
		mutex_exit(&iser_qp->qp_lock);
	}

	mutex_exit(&chan->ic_conn->ic_lock);
}

/*
 * iser_ib_handle_portup_event()
 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event.
 *
 * To facilitate a seamless bringover of the port and configure the CM service
 * for inbound iSER service requests on this newly active port, the existing
 * IDM services will be checked for iSER support.
 * If an iSER service was already created, then this service will simply be
 * bound to the gid of the newly active port. If on the other hand, the CM
 * service did not exist, i.e. only socket communication, then a new CM
 * service will be first registered with the saved service parameters and
 * then bound to the newly active port.
 *
 */
/* ARGSUSED */
static void
iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
{
	iser_hca_t		*hca;
	ib_gid_t		gid;
	idm_svc_t		*idm_svc;
	int			status;

	ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)",
	    (longlong_t)event->ev_hca_guid, event->ev_port);

	/*
	 * Query all ports on the HCA and update the port information
	 * maintainted in the iser_hca_t structure
	 */
	hca = iser_ib_guid2hca(event->ev_hca_guid);
	if (hca == NULL) {

		/* HCA is just made available, first port on that HCA */
		hca = iser_ib_alloc_hca(event->ev_hca_guid);
		if (hca == NULL) {
			ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
			    "iser_ib_alloc_hca failed: HCA(0x%llx) port(%d)",
			    (longlong_t)event->ev_hca_guid, event->ev_port);
			return;
		}
		mutex_enter(&iser_state->is_hcalist_lock);
		list_insert_tail(&iser_state->is_hcalist, hca);
		iser_state->is_num_hcas++;
		mutex_exit(&iser_state->is_hcalist_lock);

	} else {

		status = iser_ib_update_hcaports(hca);

		if (status != IBT_SUCCESS) {
			ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
			    "status(0x%x): iser_ib_update_hcaports failed: "
			    "HCA(0x%llx) port(%d)", status,
			    (longlong_t)event->ev_hca_guid, event->ev_port);
			return;
		}
	}

	gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];

	/*
	 * Iterate through the global list of IDM target services
	 * and check for existing iSER CM service.
	 */
	mutex_enter(&idm.idm_global_mutex);
	for (idm_svc = list_head(&idm.idm_tgt_svc_list);
	    idm_svc != NULL;
	    idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) {


		if (idm_svc->is_iser_svc == NULL) {

			/* Establish a new CM service for iSER requests */
			status = iser_tgt_svc_create(
			    &idm_svc->is_svc_req, idm_svc);

			if (status != IBT_SUCCESS) {
				ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
				    "status(0x%x): iser_tgt_svc_create failed: "
				    "HCA(0x%llx) port(%d)", status,
				    (longlong_t)event->ev_hca_guid,
				    event->ev_port);

				continue;
			}
		}

		status = iser_ib_activate_port(
		    idm_svc, event->ev_hca_guid, gid);
		if (status != IBT_SUCCESS) {

			ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
			    "status(0x%x): Bind service on port "
			    "(%llx:%llx) failed",
			    status, (longlong_t)gid.gid_prefix,
			    (longlong_t)gid.gid_guid);

			continue;
		}
		ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound "
		    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
		    event->ev_port);
	}
	mutex_exit(&idm.idm_global_mutex);

	ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: "
	    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
	    event->ev_port);
}

/*
 * iser_ib_handle_portdown_event()
 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error.
 *
 * Unconfigure the CM service on the deactivated port and teardown the
 * connections that are using the CM service.
 */
/* ARGSUSED */
static void
iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
{
	iser_hca_t		*hca;
	ib_gid_t		gid;
	int			status;

	/*
	 * Query all ports on the HCA and update the port information
	 * maintainted in the iser_hca_t structure
	 */
	hca = iser_ib_guid2hca(event->ev_hca_guid);
	ASSERT(hca != NULL);

	status = iser_ib_update_hcaports(hca);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): "
		    "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)",
		    status, (longlong_t)event->ev_hca_guid, event->ev_port);
		return;
	}

	/* get the gid of the new port */
	gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
	iser_ib_deactivate_port(event->ev_hca_guid, gid);

	ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: "
	    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
	    event->ev_port);
}

/*
 * iser_ib_handle_hca_detach_event()
 * Quiesce all activity bound for the port, teardown the connection, unbind
 * iSER services on all ports and release the HCA handle.
 */
/* ARGSUSED */
static void
iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
{
	iser_hca_t	*nexthca, *hca;
	int		i, status;

	ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)",
	    (longlong_t)event->ev_hca_guid);

	hca = iser_ib_guid2hca(event->ev_hca_guid);
	for (i = 0; i < hca->hca_num_ports; i++) {
		iser_ib_deactivate_port(hca->hca_guid,
		    hca->hca_port_info[i].p_sgid_tbl[0]);
	}

	/*
	 * Update the HCA list maintained in the iser_state. Free the
	 * resources allocated to the HCA, i.e. caches, protection domain
	 */
	mutex_enter(&iser_state->is_hcalist_lock);

	for (hca = list_head(&iser_state->is_hcalist);
	    hca != NULL;
	    hca = nexthca) {

		nexthca = list_next(&iser_state->is_hcalist, hca);

		if (hca->hca_guid == event->ev_hca_guid) {

			list_remove(&iser_state->is_hcalist, hca);
			iser_state->is_num_hcas--;

			status = iser_ib_free_hca(hca);
			if (status != DDI_SUCCESS) {
				ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: "
				    "Failed to free hca(%p)", (void *)hca);
				list_insert_tail(&iser_state->is_hcalist, hca);
				iser_state->is_num_hcas++;
			}
			/* No way to return status to IBT if this fails */
		}
	}
	mutex_exit(&iser_state->is_hcalist_lock);

}

/*
 * iser_ib_async_handler
 * An IBT Asynchronous Event handler is registered it with the framework and
 * passed via the ibt_attach() routine. This function handles the following
 * asynchronous events.
 * IBT_EVENT_PORT_UP
 * IBT_ERROR_PORT_DOWN
 * IBT_HCA_ATTACH_EVENT
 * IBT_HCA_DETACH_EVENT
 */
/* ARGSUSED */
void
iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
    ibt_async_event_t *event)
{
	switch (code) {
	case IBT_EVENT_PORT_UP:
		iser_ib_handle_portup_event(hdl, event);
		break;

	case IBT_ERROR_PORT_DOWN:
		iser_ib_handle_portdown_event(hdl, event);
		break;

	case IBT_HCA_ATTACH_EVENT:
		/*
		 * A new HCA device is available for use, ignore this
		 * event because the corresponding IBT_EVENT_PORT_UP
		 * events will get triggered and handled accordingly.
		 */
		break;

	case IBT_HCA_DETACH_EVENT:
		iser_ib_handle_hca_detach_event(hdl, event);
		break;

	default:
		break;
	}
}

/*
 * iser_ib_init_hcas
 *
 * This function opens all the HCA devices, gathers the HCA state information
 * and adds the HCA handle for each HCA found in the iser_soft_state.
 */
static int
iser_ib_init_hcas(void)
{
	ib_guid_t	*guid;
	int		num_hcas;
	int		i;
	iser_hca_t	*hca;

	/* Retrieve the HCA list */
	num_hcas = ibt_get_hca_list(&guid);
	if (num_hcas == 0) {
		/*
		 * This shouldn't happen, but might if we have all HCAs
		 * detach prior to initialization.
		 */
		return (DDI_FAILURE);
	}

	/* Initialize the hcalist lock */
	mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL);

	/* Create the HCA list */
	list_create(&iser_state->is_hcalist, sizeof (iser_hca_t),
	    offsetof(iser_hca_t, hca_node));

	for (i = 0; i < num_hcas; i++) {

		ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA "
		    "(0x%llx)", (longlong_t)guid[i]);

		hca = iser_ib_alloc_hca(guid[i]);
		if (hca == NULL) {
			/* This shouldn't happen, teardown and fail */
			(void) iser_ib_fini_hcas();
			(void) ibt_free_hca_list(guid, num_hcas);
			return (DDI_FAILURE);
		}

		mutex_enter(&iser_state->is_hcalist_lock);
		list_insert_tail(&iser_state->is_hcalist, hca);
		iser_state->is_num_hcas++;
		mutex_exit(&iser_state->is_hcalist_lock);

	}

	/* Free the IBT HCA list */
	(void) ibt_free_hca_list(guid, num_hcas);

	/* Check that we've initialized at least one HCA */
	mutex_enter(&iser_state->is_hcalist_lock);
	if (list_is_empty(&iser_state->is_hcalist)) {
		ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize "
		    "any HCAs");

		mutex_exit(&iser_state->is_hcalist_lock);
		(void) iser_ib_fini_hcas();
		return (DDI_FAILURE);
	}
	mutex_exit(&iser_state->is_hcalist_lock);

	return (DDI_SUCCESS);
}

/*
 * iser_ib_fini_hcas
 *
 * Teardown the iSER HCA list initialized above.
 */
static int
iser_ib_fini_hcas(void)
{
	iser_hca_t	*nexthca, *hca;
	int		status;

	mutex_enter(&iser_state->is_hcalist_lock);
	for (hca = list_head(&iser_state->is_hcalist);
	    hca != NULL;
	    hca = nexthca) {

		nexthca = list_next(&iser_state->is_hcalist, hca);

		list_remove(&iser_state->is_hcalist, hca);

		status = iser_ib_free_hca(hca);
		if (status != IBT_SUCCESS) {
			ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free "
			    "HCA during fini");
			list_insert_tail(&iser_state->is_hcalist, hca);
			return (DDI_FAILURE);
		}

		iser_state->is_num_hcas--;

	}
	mutex_exit(&iser_state->is_hcalist_lock);
	list_destroy(&iser_state->is_hcalist);
	mutex_destroy(&iser_state->is_hcalist_lock);

	return (DDI_SUCCESS);
}

/*
 * iser_ib_alloc_hca
 *
 * This function opens the given HCA device, gathers the HCA state information
 * and adds the HCA handle
 */
static iser_hca_t *
iser_ib_alloc_hca(ib_guid_t guid)
{
	iser_hca_t	*hca;
	int		status;

	/* Allocate an iser_hca_t HCA handle */
	hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP);

	/* Open this HCA */
	status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:"
		    " guid (0x%llx) status (0x%x)", (longlong_t)guid, status);
		kmem_free(hca, sizeof (iser_hca_t));
		return (NULL);
	}

	hca->hca_guid		= guid;
	hca->hca_clnt_hdl	= iser_state->is_ibhdl;

	/* Query the HCA */
	status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca "
		    "failure: guid (0x%llx) status (0x%x)",
		    (longlong_t)guid, status);
		(void) ibt_close_hca(hca->hca_hdl);
		kmem_free(hca, sizeof (iser_hca_t));
		return (NULL);
	}

	/* Query all ports on the HCA */
	status = ibt_query_hca_ports(hca->hca_hdl, 0,
	    &hca->hca_port_info, &hca->hca_num_ports,
	    &hca->hca_port_info_sz);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: "
		    "ibt_query_hca_ports failure: guid (0x%llx) "
		    "status (0x%x)", (longlong_t)guid, status);
		(void) ibt_close_hca(hca->hca_hdl);
		kmem_free(hca, sizeof (iser_hca_t));
		return (NULL);
	}

	/* Allocate a single PD on this HCA */
	status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS,
	    &hca->hca_pdhdl);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd "
		    "failure: guid (0x%llx) status (0x%x)",
		    (longlong_t)guid, status);
		(void) ibt_close_hca(hca->hca_hdl);
		ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz);
		kmem_free(hca, sizeof (iser_hca_t));
		return (NULL);
	}

	/* Initialize the message and data MR caches for this HCA */
	iser_init_hca_caches(hca);

	return (hca);
}

static int
iser_ib_free_hca(iser_hca_t *hca)
{
	int			status;
	ibt_hca_portinfo_t	*hca_port_info;
	uint_t			hca_port_info_sz;

	ASSERT(hca != NULL);
	if (hca->hca_failed)
		return (DDI_FAILURE);

	hca_port_info = hca->hca_port_info;
	hca_port_info_sz = hca->hca_port_info_sz;

	/*
	 * Free the memory regions before freeing
	 * the associated protection domain
	 */
	iser_fini_hca_caches(hca);

	status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD "
		    "status=0x%x", status);
		goto out_caches;
	}

	status = ibt_close_hca(hca->hca_hdl);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA "
		    "status=0x%x", status);
		goto out_pd;
	}

	ibt_free_portinfo(hca_port_info, hca_port_info_sz);

	kmem_free(hca, sizeof (iser_hca_t));
	return (DDI_SUCCESS);

	/*
	 * We only managed to partially tear down the HCA, try to put it back
	 * like it was before returning.
	 */
out_pd:
	status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl);
	if (status != IBT_SUCCESS) {
		hca->hca_failed = B_TRUE;
		/* Report error and exit */
		ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD "
		    "status=0x%x", status);
		return (DDI_FAILURE);
	}

out_caches:
	iser_init_hca_caches(hca);

	return (DDI_FAILURE);
}

static int
iser_ib_update_hcaports(iser_hca_t *hca)
{
	ibt_hca_portinfo_t	*pinfop, *oldpinfop;
	uint_t			size, oldsize, nport;
	int			status;

	ASSERT(hca != NULL);

	status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status);
		return (status);
	}

	oldpinfop = hca->hca_port_info;
	oldsize	= hca->hca_port_info_sz;
	hca->hca_port_info = pinfop;
	hca->hca_port_info_sz = size;

	(void) ibt_free_portinfo(oldpinfop, oldsize);

	return (IBT_SUCCESS);
}

/*
 * iser_ib_gid2hca
 * Given a gid, find the corresponding hca
 */
iser_hca_t *
iser_ib_gid2hca(ib_gid_t gid)
{

	iser_hca_t	*hca;
	int		i;

	mutex_enter(&iser_state->is_hcalist_lock);
	for (hca = list_head(&iser_state->is_hcalist);
	    hca != NULL;
	    hca = list_next(&iser_state->is_hcalist, hca)) {

		for (i = 0; i < hca->hca_num_ports; i++) {
			if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix ==
			    gid.gid_prefix) &&
			    (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid ==
			    gid.gid_guid)) {

				mutex_exit(&iser_state->is_hcalist_lock);

				return (hca);
			}
		}
	}
	mutex_exit(&iser_state->is_hcalist_lock);
	return (NULL);
}

/*
 * iser_ib_guid2hca
 * Given a HCA guid, find the corresponding HCA
 */
iser_hca_t *
iser_ib_guid2hca(ib_guid_t guid)
{

	iser_hca_t	*hca;

	mutex_enter(&iser_state->is_hcalist_lock);
	for (hca = list_head(&iser_state->is_hcalist);
	    hca != NULL;
	    hca = list_next(&iser_state->is_hcalist, hca)) {

		if (hca->hca_guid == guid) {
			mutex_exit(&iser_state->is_hcalist_lock);
			return (hca);
		}
	}
	mutex_exit(&iser_state->is_hcalist_lock);
	return (NULL);
}

/*
 * iser_ib_conv_sockaddr2ibtaddr
 * This function converts a socket address into the IBT format
 */
void iser_ib_conv_sockaddr2ibtaddr(
    idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr)
{
	if (saddr == NULL) {
		ibt_addr->family = AF_UNSPEC;
		ibt_addr->un.ip4addr = 0;
	} else {
		switch (saddr->sin.sa_family) {
		case AF_INET:

			ibt_addr->family	= saddr->sin4.sin_family;
			ibt_addr->un.ip4addr	= saddr->sin4.sin_addr.s_addr;
			break;

		case AF_INET6:

			ibt_addr->family	= saddr->sin6.sin6_family;
			ibt_addr->un.ip6addr	= saddr->sin6.sin6_addr;
			break;

		default:
			ibt_addr->family = AF_UNSPEC;
		}

	}
}

/*
 * iser_ib_conv_ibtaddr2sockaddr
 * This function converts an IBT ip address handle to a sockaddr
 */
void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss,
    ibt_ip_addr_t *ibt_addr, in_port_t port)
{
	struct sockaddr_in *sin;
	struct sockaddr_in6 *sin6;

	switch (ibt_addr->family) {
	case AF_INET:
	case AF_UNSPEC:

		sin = (struct sockaddr_in *)ibt_addr;
		sin->sin_port = ntohs(port);
		bcopy(sin, ss, sizeof (struct sockaddr_in));
		break;

	case AF_INET6:

		sin6 = (struct sockaddr_in6 *)ibt_addr;
		sin6->sin6_port = ntohs(port);
		bcopy(sin6, ss, sizeof (struct sockaddr_in6));
		break;

	default:
		ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: "
		    "unknown family type: 0x%x", ibt_addr->family);
	}
}

/*
 * iser_ib_setup_cq
 * This function sets up the Completion Queue size and allocates the specified
 * Completion Queue
 */
static int
iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl)
{

	ibt_cq_attr_t		cq_attr;
	int			status;

	cq_attr.cq_size		= cq_size;
	cq_attr.cq_sched	= 0;
	cq_attr.cq_flags	= IBT_CQ_NO_FLAGS;

	/* Allocate a Completion Queue */
	status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL);
	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)",
		    status);
		return (status);
	}

	return (ISER_STATUS_SUCCESS);
}

/*
 * iser_ib_setup_chanargs
 *
 */
static void
iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
    ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
    ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs)
{

	bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t));

	/*
	 * Set up the size of the channels send queue, receive queue and the
	 * maximum number of elements in a scatter gather list of work requests
	 * posted to the send and receive queues.
	 */
	cargs->rc_sizes.cs_sq		= sq_size;
	cargs->rc_sizes.cs_rq		= rq_size;
	cargs->rc_sizes.cs_sq_sgl	= ISER_IB_SGLIST_SIZE;
	cargs->rc_sizes.cs_rq_sgl	= ISER_IB_SGLIST_SIZE;

	/*
	 * All Work requests signaled on a WR basis will receive a send
	 * request completion.
	 */
	cargs->rc_flags			= IBT_ALL_SIGNALED;

	/* Enable RDMA read and RDMA write on the channel end points */
	cargs->rc_control		= IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;

	/* Set the local hca port on which the channel is allocated */
	cargs->rc_hca_port_num		= hca_port;

	/* Set the Send and Receive Completion Queue handles */
	cargs->rc_scq			= scq_hdl;
	cargs->rc_rcq			= rcq_hdl;

	/* Set the protection domain associated with the channel */
	cargs->rc_pd			= hca_pdhdl;

	/* No SRQ usage */
	cargs->rc_srq			= NULL;
}

/*
 * iser_ib_init_qp
 * Initialize the QP handle
 */
void
iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size)
{
	/* Initialize the handle lock */
	mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL);

	/* Record queue sizes */
	chan->ic_qp.sq_size = sq_size;
	chan->ic_qp.rq_size = rq_size;

	/* Initialize the RQ monitoring data */
	chan->ic_qp.rq_depth  = rq_size;
	chan->ic_qp.rq_level  = 0;
	chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100;

	/* Initialize the taskq flag */
	chan->ic_qp.rq_taskqpending = B_FALSE;
}

/*
 * iser_ib_fini_qp
 * Teardown the QP handle
 */
void
iser_ib_fini_qp(iser_qp_t *qp)
{
	/* Destroy the handle lock */
	mutex_destroy(&qp->qp_lock);
}

static int
iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid)
{
	iser_svc_t	*iser_svc;
	iser_sbind_t	*is_sbind;
	int		status;

	iser_svc = idm_svc->is_iser_svc;

	/*
	 * Save the address of the service bind handle in the
	 * iser_svc_t to undo the service binding at a later time
	 */
	is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP);
	is_sbind->is_gid	= gid;
	is_sbind->is_guid	= guid;

	status  = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL,
	    idm_svc, &is_sbind->is_sbindhdl);

	if (status != IBT_SUCCESS) {
		ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): "
		    "Bind service(%llx) on port(%llx:%llx) failed",
		    status, (longlong_t)iser_svc->is_svcid,
		    (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);

		kmem_free(is_sbind, sizeof (iser_sbind_t));

		return (status);
	}

	list_insert_tail(&iser_svc->is_sbindlist, is_sbind);

	return (IBT_SUCCESS);
}

static void
iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid)
{
	iser_svc_t	*iser_svc;
	iser_conn_t	*iser_conn;
	iser_sbind_t	*is_sbind;
	idm_conn_t	*idm_conn;

	/*
	 * Iterate through the global list of IDM target connections.
	 * Issue a TRANSPORT_FAIL for any connections on this port, and
	 * if there is a bound service running on the port, tear it down.
	 */
	mutex_enter(&idm.idm_global_mutex);
	for (idm_conn = list_head(&idm.idm_tgt_conn_list);
	    idm_conn != NULL;
	    idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) {

		if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) {
			/* this is not an iSER connection, skip it */
			continue;
		}

		iser_conn = idm_conn->ic_transport_private;
		if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) {
			/* this iSER connection is on a different port */
			continue;
		}

		/* Fail the transport for this connection */
		idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);

		if (idm_conn->ic_conn_type == CONN_TYPE_INI) {
			/* initiator connection, nothing else to do */
			continue;
		}

		/* Check for a service binding */
		iser_svc = idm_conn->ic_svc_binding->is_iser_svc;
		is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid);
		if (is_sbind != NULL) {
			/* This service is still bound, tear it down */
			(void) ibt_unbind_service(iser_svc->is_srvhdl,
			    is_sbind->is_sbindhdl);
			list_remove(&iser_svc->is_sbindlist, is_sbind);
			kmem_free(is_sbind, sizeof (iser_sbind_t));
		}
	}
	mutex_exit(&idm.idm_global_mutex);
}

static iser_sbind_t *
iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid)
{
	iser_sbind_t	*is_sbind;

	for (is_sbind = list_head(&iser_svc->is_sbindlist);
	    is_sbind != NULL;
	    is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) {

		if ((is_sbind->is_guid == hca_guid) &&
		    (is_sbind->is_gid.gid_prefix == gid.gid_prefix) &&
		    (is_sbind->is_gid.gid_guid == gid.gid_guid)) {
			return (is_sbind);
		}
	}
	return (NULL);
}